rm(list=ls());gc()
##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 430451 23.0     910535 48.7   638648 34.2
## Vcells 803034  6.2    8388608 64.0  1631747 12.5
if(!require(googledrive)){install.packages("googledrive")}
## Loading required package: googledrive
## Warning: package 'googledrive' was built under R version 4.0.3
if(!require(here)){install.packages("here")}
## Loading required package: here
## Warning: package 'here' was built under R version 4.0.3
## here() starts at C:/Users/andre/Google Drive/DOCUMENTOS/6.TESIS 2018/LCA NAQ
try(drive_download(as_id("1Tz6kjUfM3rvQwaAMoCoRXwOa8-y2bqAS")));1
## Using an auto-discovered, cached token.
## To suppress this message, modify your code or options to clearly consent to the use of a cached token.
## See gargle's "Non-interactive auth" vignette for more details:
## https://gargle.r-lib.org/articles/non-interactive-auth.html
## The googledrive package is using a cached token for gonzalez.santacruz.andres@gmail.com.
## Error : Path exists and overwrite is FALSE
## [1] 1
load(paste0(here::here(),"/BD_2020_for_svychisq.Rdata"))
options(OutDec= ".")
no_cores <- parallel::detectCores() - 1
cl<-parallel::makeCluster(no_cores)
doParallel::registerDoParallel(cl)

if(isTRUE(getOption('knitr.in.progress'))==T){
    clus_iter=1000
} else {
  input <- readline('¿Are you gonna run the dataset with the whole iterations? (Si/No): ')
  if(input=="Si"){
    clus_iter=1000
  } else {
    clus_iter=5
  }
}
local({r <- getOption("repos")
       r["CRAN"] <- "http://cran.r-project.org" 
       options(repos=r)
})

if(!require(visreg)){install.packages("visreg")}
## Loading required package: visreg
if(!require(qdapTools)){install.packages("qdapTools")}
## Loading required package: qdapTools
if(!require(psych)){install.packages("psych")}
## Loading required package: psych
## 
## Attaching package: 'psych'
## The following object is masked from 'package:qdapTools':
## 
##     lookup
if(!require(car)){install.packages("car")}
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
if(!require(binda)){install.packages("binda")}
## Loading required package: binda
## Loading required package: entropy
if(!require(data.table)){install.packages("data.table")}
## Loading required package: data.table
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:qdapTools':
## 
##     shift
if(!require(remotes)){install.packages("remotes")}
## Loading required package: remotes
if(!require(Epi)){install.packages("Epi")}
## Loading required package: Epi
if(!require(reshape2)){install.packages("reshape2")}
## Loading required package: reshape2
## 
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
## 
##     dcast, melt
if(!require(plyr)){install.packages("plyr")}
## Loading required package: plyr
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:qdapTools':
## 
##     id
## The following object is masked from 'package:here':
## 
##     here
if(!require(Weighted.Desc.Stat)){install.packages("Weighted.Desc.Stat")}
## Loading required package: Weighted.Desc.Stat
#if(!require(poLCA)){githubinstall::gh_install_packages("poLCA", ref = github_pull("14"))}
if(!require(ggplot2)){install.packages("ggplot2")}
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
if(!require(ggparallel)){install.packages("ggparallel")}
## Loading required package: ggparallel
if(!require(igraph)){install.packages("igraph")}
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
if(!require(tidyr)){install.packages("tidyr")}
## Loading required package: tidyr
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:igraph':
## 
##     crossing
## The following object is masked from 'package:reshape2':
## 
##     smiths
if(!require(knitr)){install.packages("knitr")}
## Loading required package: knitr
if(!require(broom)){install.packages("broom")}
## Loading required package: broom
if(!require(survey)){install.packages("survey")}
## Loading required package: survey
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
if(!require(dplyr)){install.packages("dplyr")}
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following object is masked from 'package:car':
## 
##     recode
## The following object is masked from 'package:qdapTools':
## 
##     id
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
if(!require(DiagrammeR)){install.packages("DiagrammeR")}
## Loading required package: DiagrammeR
if(!require(plotly)){install.packages("plotly")}
## Loading required package: plotly
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:igraph':
## 
##     groups
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate, rename, summarise
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
if(!require(tidyr)){install.packages("tidyr")}
if(!require(tidylog)){install.packages("tidylog")}
## Loading required package: tidylog
## 
## Attaching package: 'tidylog'
## The following objects are masked from 'package:plotly':
## 
##     distinct, filter, group_by, mutate, rename, select, slice,
##     summarise, transmute, ungroup
## The following objects are masked from 'package:dplyr':
## 
##     add_count, add_tally, anti_join, count, distinct, distinct_all,
##     distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
##     full_join, group_by, group_by_all, group_by_at, group_by_if,
##     inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
##     relocate, rename, rename_all, rename_at, rename_if, rename_with,
##     right_join, sample_frac, sample_n, select, select_all, select_at,
##     select_if, semi_join, slice, slice_head, slice_max, slice_min,
##     slice_sample, slice_tail, summarise, summarise_all, summarise_at,
##     summarise_if, summarize, summarize_all, summarize_at, summarize_if,
##     tally, top_frac, top_n, transmute, transmute_all, transmute_at,
##     transmute_if, ungroup
## The following objects are masked from 'package:tidyr':
## 
##     drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
##     spread, uncount
## The following objects are masked from 'package:plyr':
## 
##     count, mutate, rename, summarise, summarize
## The following object is masked from 'package:stats':
## 
##     filter
if(!require(tableone)){install.packages("tableone")}
## Loading required package: tableone
if(!require(parallel)){install.packages("parallel")}
## Loading required package: parallel
if(!require(doParallel)){install.packages("doParallel")}
## Loading required package: doParallel
## Loading required package: foreach
## Loading required package: iterators
if(!require(githubinstall)){install.packages("githubinstall")}
## Loading required package: githubinstall
if(!require(gurobi)){install.packages("C:/gurobi911/win64/R/gurobi_9.1-1.zip", repos=NULL, type="source")}
## Loading required package: gurobi
## Loading required package: slam
## 
## Attaching package: 'slam'
## The following object is masked from 'package:data.table':
## 
##     rollup
if(!require(designmatch)){install.packages("designmatch")}
## Loading required package: designmatch
## Loading required package: lattice
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:tidylog':
## 
##     select
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: Rglpk
## Using the GLPK callable library version 4.47
if(!require(glpkAPI)){install.packages("glpkAPI")}
## Loading required package: glpkAPI
## using GLPK version 4.47
if(!require(foreign)){install.packages("foreign")}
## Loading required package: foreign
if(!require(Hmisc)){install.packages("Hmisc")}
## Loading required package: Hmisc
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:tidylog':
## 
##     summarize
## The following object is masked from 'package:plotly':
## 
##     subplot
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following object is masked from 'package:survey':
## 
##     deff
## The following objects are masked from 'package:plyr':
## 
##     is.discrete, summarize
## The following object is masked from 'package:psych':
## 
##     describe
## The following objects are masked from 'package:base':
## 
##     format.pval, units
if(!require(gridExtra)){install.packages("gridExtra")}
## Loading required package: gridExtra
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
if(!require(exactRankTests)){install.packages("exactRankTests")}
## Loading required package: exactRankTests
##  Package 'exactRankTests' is no longer under development.
##  Please consider using package 'coin' instead.
if(!require(Rglpk)){install.packages("Rglpk")}
if(!require(tableone)){install.packages("tableone")}
if(!require(MatchIt)){install.packages("MatchIt")}
## Loading required package: MatchIt
if(!require(sensitivity2x2xk)){install.packages("sensitivity2x2xk")}
## Loading required package: sensitivity2x2xk
## 
## Attaching package: 'sensitivity2x2xk'
## The following object is masked from 'package:Epi':
## 
##     mh
if(!require(sensitivityfull)){install.packages("sensitivityfull")}
## Loading required package: sensitivityfull
if(!require(cobalt)){install.packages("cobalt")}
## Loading required package: cobalt
##  cobalt (Version 4.2.4, Build Date: 2020-11-05 17:30:21 UTC)
## 
## Attaching package: 'cobalt'
## The following object is masked from 'package:MatchIt':
## 
##     lalonde
if(!require(jtools)){install.packages("jtools")}
## Loading required package: jtools
## 
## Attaching package: 'jtools'
## The following object is masked from 'package:Hmisc':
## 
##     %nin%
if(!require(interactions)){install.packages("interactions")}
## Loading required package: interactions
if(!require(ggiraph)){install.packages("ggiraph")}
## Loading required package: ggiraph
if(!require(poliscidata)){install.packages("poliscidata")}
## Loading required package: poliscidata
## Registered S3 method overwritten by 'gdata':
##   method         from  
##   reorder.factor gplots
## 
## Attaching package: 'poliscidata'
## The following object is masked from 'package:jtools':
## 
##     wtd.sd
## The following objects are masked from 'package:Hmisc':
## 
##     csv.get, cut2, describe, spss.get, stata.get, wtd.mean,
##     wtd.quantile, wtd.var
## The following objects are masked from 'package:survey':
## 
##     svyboxplot, svyby, svychisq, svydesign, svyglm, svymean, svyplot,
##     svytable
## The following object is masked from 'package:plyr':
## 
##     ddply
## The following object is masked from 'package:car':
## 
##     scatterplot
## The following object is masked from 'package:psych':
## 
##     describe
if(!require(tidybayes)){install.packages("tidybayes")}
## Loading required package: tidybayes
if(!require(brms)){install.packages("brms")}
## Loading required package: brms
## Loading required package: Rcpp
## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car
## Loading 'brms' package (version 2.15.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## The following objects are masked from 'package:tidybayes':
## 
##     dstudent_t, pstudent_t, qstudent_t, rstudent_t
## The following object is masked from 'package:survival':
## 
##     kidney
## The following object is masked from 'package:psych':
## 
##     cs
## The following object is masked from 'package:stats':
## 
##     ar
if(!require(ROCR)){install.packages("ROCR")}
## Loading required package: ROCR
if(!require(rstanarm)){install.packages("rstanarm")}
## Loading required package: rstanarm
## This is rstanarm version 2.21.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
##   options(mc.cores = parallel::detectCores())
## 
## Attaching package: 'rstanarm'
## The following objects are masked from 'package:brms':
## 
##     dirichlet, exponential, get_y, lasso, ngrps
## The following object is masked from 'package:car':
## 
##     logit
## The following object is masked from 'package:psych':
## 
##     logit
if(!require(loo)){install.packages("loo")}
## Loading required package: loo
## This is loo version 2.4.1
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
## - Windows 10 users: loo may be very slow if 'mc.cores' is set in your .Rprofile file (see https://github.com/stan-dev/loo/issues/94).
## 
## Attaching package: 'loo'
## The following object is masked from 'package:igraph':
## 
##     compare
if(!require(emmeans)){install.packages("emmeans")}
## Loading required package: emmeans
if(!require(finalfit)){install.packages("finalfit")}
## Loading required package: finalfit
if(!require(tipr)){install.packages("tipr")}
## Loading required package: tipr
if(!require(EValue)){install.packages("EValue")}
## Loading required package: EValue
## 
## Attaching package: 'EValue'
## The following object is masked from 'package:survey':
## 
##     HR
#devtools::install_github('ChristopherLucas/MatchingFrontier')

#if(!require(radiant.update)){install.packages("radiant.update", repos = "https://radiant-rstats.github.io/minicran/")}

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#

copiar_nombres <- function(x,row.names=FALSE,col.names=TRUE,dec=",",...) {
  if(class(ungroup(x))[1]=="tbl_df"){
        if(options()$OutDec=="."){
            options(OutDec = dec)
            write.table(format(data.frame(x)),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...)
            options(OutDec = ".")
          return(x)
        } else {
            options(OutDec = ",")
            write.table(format(data.frame(x)),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...)
            options(OutDec = ",")
          return(x)    
        }
  } else {
        if(options()$OutDec=="."){
            options(OutDec = dec)
            write.table(format(x),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...)
            options(OutDec = ".")
          return(x)
        } else {
            options(OutDec = ",")
            write.table(format(x),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...)
            options(OutDec = ",")
          return(x)       
  }
 }
}  
guardar_tablas <- function (x,y) {writexl::write_xlsx(as.data.frame(x, keeprownames= T),paste0(y,".xlsx"))}

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
ajusteAFC <- function (x) {as.data.frame(cbind("Modelo"=deparse(substitute(x)),
                    "gl"=round(lavaan::fitMeasures(x) ["df"],digits=3),
                    "WLS X2"=round(lavaan::fitMeasures(x)["chisq"],digits=3),
                    "CMIN/df"=round((lavaan::fitMeasures(x)["chisq"]/fitMeasures(x) ["df"]),digits=3),
                    "aGFI"=round(lavaan::fitMeasures(x)["agfi"],digits=3),
                    "GFI"=round(lavaan::fitMeasures(x)["gfi"],digits=3),
                    "RMSEA [90% IC]"=paste0(round(lavaan::fitMeasures(x) ["rmsea"],3),"[",round(lavaan::fitMeasures(x) ["rmsea.ci.lower"],3),"-",round(lavaan::fitMeasures(x) ["rmsea.ci.upper"],3),"]"),
                    "CFit"=round(lavaan::fitMeasures(x)["rmsea.pvalue"],digits=3),
                    "CFI"=round(lavaan::fitMeasures(x)["cfi"],digits=3),
                    "NNFI"=round(lavaan::fitMeasures(x)["nnfi"],digits=3)))

  
  return(
    as.data.frame(cbind("Modelo"=deparse(substitute(x)),
                    "gl"=round(lavaan::fitMeasures(x) ["df"],digits=3),
                    "WLS X2"=sprintf("%7.3f",round(lavaan::fitMeasures(x)["chisq"],digits=3)),
                    "CMIN/df"=sprintf("%5.3f",round((lavaan::fitMeasures(x)["chisq"]/fitMeasures(x) ["df"]),digits=3)),
                    "aGFI"=sprintf("%5.3f",round(lavaan::fitMeasures(x)["agfi"],digits=3)),
                    "GFI"=sprintf("%5.3f",round(lavaan::fitMeasures(x)["gfi"],digits=3)),
                    "RMSEA [90% IC]"=paste0(sprintf("%5.3f",round(lavaan::fitMeasures(x) ["rmsea"],3)),"[",sprintf("%5.3f",round(lavaan::fitMeasures(x) ["rmsea.ci.lower"],3)),"-",sprintf("%5.3f",round(lavaan::fitMeasures(x) ["rmsea.ci.upper"],3)),"]"),
                    "CFit"=sprintf("%5.3f",round(lavaan::fitMeasures(x)["rmsea.pvalue"],digits=3)),
                    "CFI"=sprintf("%5.3f",round(lavaan::fitMeasures(x)["cfi"],digits=3)),
                    "NNFI"=sprintf("%5.3f",round(lavaan::fitMeasures(x)["nnfi"],digits=3))))
  )
}
options(mc.cores = 8) 

Model

#Identification of effect modification of the effect of Treatment on D by Q without identification of the joint effects of E and D


library(DiagrammeR) 
# Nodes
 #node [shape = box]
 # S [label = 'Matched\n(S=1)',fontsize=7]
 # C [label = 'Not censored\n(C=0)',fontsize=7]
gr1<-
DiagrammeR::grViz("
digraph causal {

# Nodes
  node [shape = plaintext]
  a [label = 'Sex\nx\nWorkplace Bullying\n(X)',fontsize=10]
  b [label = 'Unobserved\nConfounders\n(U)',fontsize=10]
  c [label = 'Risky\nDrinking\n(Y)',fontsize=10]
  d [label = 'Observed\nConfounders\n(Z)',fontsize=10]

# Edges
  edge [color = black,
        arrowhead = vee]
  rankdir = TB;
  
  b -> c 
  b -> a 
  a -> c
  d -> a


  d -> c [minlen=1]
 # a -> d [minlen=1]
  
 # a -> S #[minlen=1]
 # Z -> S #[minlen=1]
  
#  a -> C #[minlen=3]
#  Z -> C #[minlen=3]
  { rank = same; b; a; c }
# { rank = same; S; C }
  { rankdir = LR; a; d }

# Graph
  graph [overlap = true]
}")
gr1

Figure 1. Directed Acyclic Graph

#  {rank=same ; A -> B -> C -> D};
#       {rank=same ;           F -> E[dir=back]};
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3733703/
#Cohort matching on a variable associated with both outcome and censoring
#Cohort matching on a confounder. We let A denote an exposure, Y denote an outcome, and C denote a confounder and matching variable. The variable S indicates whether an individual in the source population is selected for the matched study (1: selected, 0: not selected). See Section 2-7 for details.
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7064555/

##https://www.cs.ubc.ca/labs/lci/mlrg/slides/doCalc.pdf
##https://link.springer.com/article/10.1007/s10433-018-0480-5

Explore Data

Data is weighted following a poststratification by sex and region, according to the National Survey of Employment (Encuesta Nacional de Empleo) of the first three months of 2018 (variable weight). This is why we need to weight values in each variable.


library(readr)
## Warning: package 'readr' was built under R version 4.0.3
attr(BD_2020_input$weight, "label") <- 'Weight (ponderador)'
attr(BD_2020_input$zona, "label") <- 'Region of Chile (Adimark)'
attr(BD_2020_input$mujer, "label") <- 'Sex (1=Women)'
attr(BD_2020_input$hombre, "label") <- 'Sex (1=Men)'

attr(BD_2020_input$autonaq, "label") <- 'Self-labeling Bullied in the Workplace (A21)'
attr(BD_2020_input$frec_acos21_1, "label") <- 'Self-labeling Frequent WB (Daily or Weekly)'
attr(BD_2020_input$frec_acos_ecotest, "label") <- 'Psychological Harrassment Araucaria (A22)'
attr(BD_2020_input$testigo, "label") <- 'Bystander of Sexual Harrassment (A28)'
attr(BD_2020_input$agr_fisica, "label") <- 'Physical Harrassment last 12 months'
attr(BD_2020_input$acos_sex, "label") <- 'Sexual Harrassment last 12 months'
attr(BD_2020_input$acos_psico_freq, "label") <- 'Psychological Harrassment Often & Too Often (Art. Ximena y Amalia)'
attr(BD_2020_input$jerarquia_acoso, "label") <- 'Expuestos a Acoso Psicológico, Físico o Sexual, Jerarquía Agresores (1)'
attr(BD_2020_input$jerarquia_acoso2, "label") <- 'Expuestos a Acoso Psicológico, Físico o Sexual, Jerarquía Agresores (2)'
attr(BD_2020_input$a29_1, "label") <- 'Sexo Agresores'

attr(BD_2020_input$k6_2, "label") <- 'Sum of raw scores Distress K6'

attr(BD_2020_input$latdec_dic, "label") <- 'Low Decisional Latitude (Dichotomized)(JCQ)'
attr(BD_2020_input$democ_dic, "label") <- 'High Job Demands (Dichotomized)(JCQ)'
attr(BD_2020_input$aposoc_tot, "label") <- 'Low Social Support (Dichotomized)(JCQ)'
attr(BD_2020_input$desbalance_dic, "label") <- 'Effort-Reward Imbalance (Dichotomized)(ERI)'

attr(BD_2020_input$climasex, "label") <- 'Sexist Climate'

attr(BD_2020_input$depre, "label") <- 'Depressive Symptoms'
attr(BD_2020_input$psicotrop, "label") <- 'Consumo de psicotrópicos'
attr(BD_2020_input$auditc, "label") <- 'Risky drinking (Short AUDIT)'
attr(BD_2020_input$salgen, "label") <- 'Salud General (1=Bad & Very Bad)'

attr(BD_2020_input$edad, "label") <- 'Age (in groups)'
attr(BD_2020_input$superv, "label") <- 'Supervises the tasks of other workers'
attr(BD_2020_input$jef_hogar, "label") <- 'Head of Household (1=Yes)'
attr(BD_2020_input$educacion_media, "label") <- 'Had not Completed Secondary School (Art. Magdalena)'
attr(BD_2020_input$cont_def, "label") <- 'Fixed-term Contract (Art. Magdalena)'
attr(BD_2020_input$ocuprev_nocalif, "label") <- 'Not qualified Occupation (Operarios, Técnicos, Conductores y Trabajdores No-Calificados) (Art. Magdalena)'
attr(BD_2020_input$carga_dom, "label") <- 'Unpaid Care Work at Home (A16)'
attr(BD_2020_input$ing_hog_xi_am277, "label") <- 'Household Income (Art. Xi y Am)'
attr(BD_2020_input$a61, "label") <- 'Economic Narrowness'
#Por favor, indique la frase que más se acerca a la realidad de su hogar: “Pensando en el total de ingresos mensuales de su hogar, este…    
attr(BD_2020_input$a4, "label") <- 'Private/Public Job'

BD_2020_input$zona<- factor(BD_2020_input$zona,labels=c("Santiago", "Valparaíso", "Concepción"))
BD_2020_input$mujer<- factor(BD_2020_input$mujer,labels=c("Hombre", "Mujer"))

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

#BD_2020_input %>% 
#dplyr::mutate_at(.vars= vars(naq1, naq2, naq3,naq4, naq5, naq6, naq7, naq8, naq9, naq10, naq11, naq12, naq13, naq14, naq15, #naq16, naq17, naq18, naq19, naq20, naq21, naq22),.funs = ~dplyr::case_when(.))
# BD_2020_input %>% janitor::tabyl(a21,frec_acos21_1)
#table(BD_2020_input$naq21)

BD_2020_input3<-
  BD_2020_input%>%
  dplyr::mutate_at(vars(c("a22","a24","a26","a28","a29_1")),
               ~dplyr::case_when(.=="nr"~NA_character_,TRUE~as.character(.))) %>% 
    dplyr::mutate_at(vars(c("a29_1")),
               ~dplyr::case_when(.=="6"~NA_character_,TRUE~as.character(.))) %>% 
    dplyr::mutate_at(vars(c("a24","a26")),
               ~dplyr::case_when(.=="3"~NA_character_,TRUE~as.character(.))) %>% 
      dplyr::mutate_at(vars(c("a61")),
               ~dplyr::case_when(.=="5"~NA_character_,TRUE~as.character(.))) %>% 
  dplyr::mutate(hay_acoso=dplyr::case_when(a22==2|a24==2|a26==2|a28==2~1,TRUE~0)) %>% 
  dplyr::mutate(hay_acoso_naq=dplyr::case_when(a21==2~1,TRUE~0)) %>% 
  dplyr::mutate_at(vars(c("a22","a24","a26","a29_1","jerarquia_acoso2","jerarquia_acoso")),
                   ~dplyr::case_when(is.na(.) & hay_acoso==0~"Not Apply",
                                     TRUE~as.character(.))) %>% 
    dplyr::mutate_at(vars(c("frec_acos21_1")),
                   ~dplyr::case_when(is.na(.) & hay_acoso_naq==0~"Not Apply",
                                     TRUE~as.character(.))) %>% 
      dplyr::mutate(jerarquia_acoso_tot_comp= dplyr::case_when(jerarquia_acoso2=="Companeros"~1,
                                                              jerarquia_acoso=="Companeros"~1,
                                                              TRUE~0)) %>% 
    dplyr::mutate(jerarquia_acoso_tot_sup= dplyr::case_when(jerarquia_acoso2=="Superiores"~1,
                                                            jerarquia_acoso=="Superiores"~1,
                                                            TRUE~0)) %>% 
    dplyr::mutate(jerarquia_acoso_tot_sub= dplyr::case_when(jerarquia_acoso2=="Subalternos"~1,
                                                            jerarquia_acoso=="Subalternos"~1,
                                                            TRUE~0)) %>% 
  dplyr::mutate(jerarquia_acoso_tot_ext= dplyr::case_when(jerarquia_acoso2=="Externos"~1,
                                                          jerarquia_acoso=="Externos"~1,
                                                          TRUE~0))  %>% 
  dplyr::mutate(gse=dplyr::case_when(gse==1~"ABC1",gse==2~"C2",gse==3~"C3",gse==4~"D",gse==5~"E")) %>% 
  dplyr::mutate(gse=readr::parse_factor(gse,levels=c('ABC1', 'C2', 'C3', 'D','E'),
                                 ordered =T,trim_ws=T,include_na =F)) %>% 
  dplyr::mutate(a61=dplyr::case_when(a61==1~"There is enough money, being able to save",
                                     a61==2~"There is just enough money, with no greater difficulties",
                                     a61==3~"There is not enough money, with some difficulties",
                                     a61==4~"There is not enough money, with greater difficulties",
                                     a61==5~NA_character_)) %>%
  dplyr::mutate(a61=parse_factor(a61,levels=c('There is enough money, being able to save', 
                                              'There is just enough money, with no greater difficulties', 
                                              'There is not enough money, with some difficulties', 
                                              'There is not enough money, with greater difficulties'),
                                 ordered =T,trim_ws=T,include_na =F)) %>% 
  dplyr::mutate(testigo=dplyr::if_else(testigo==2,"Witnessed sexual harrasment","Did not witnessed sexual harrasment",NA_character_))
  
#El pomedio del dolar al 2018 era de 626,12 636,15  652,41. Saqué un promedio, según los promedios de los meses de aplicación
#https://www.sii.cl//valores_y_fechas/dolar/dolar2018.htm

#paste0("We calculated the mean of the dollar at 2018 in the months of application (626,12  636,15  652,41). This is the mean of the months of application")

#paste0("540 chilean pesos were equal to (USD): ", format(round(540000/mean(c(626.12,   636.15, 652.41)),0),big.mark=","))
#paste0("913 chilean pesos were equal to (USD): ", format(round(913000/mean(c(626.12,   636.15, 652.41)),0),big.mark=","))

BD_2020_input3$ing_hog_xi_am277<- factor(BD_2020_input3$ing_hog_xi_am277,labels=c("Less or equal than 540.000 pesos (846 USD)","From 540.001 to 913.000 pesos (Between 846 and 1,431 USD)","More than 913.000 pesos (1,431 USD)"))
BD_2020_input3$autonaq<- factor(BD_2020_input3$autonaq,labels=c("No","Yes"))
BD_2020_input3$frec_acos21_1<- factor(BD_2020_input3$frec_acos21_1,labels=c("No","Yes","Not Apply"))

BD_2020_input3$a22<- factor(BD_2020_input3$a22,labels=c("No","Yes","Not Apply"))
BD_2020_input3$a24<- factor(BD_2020_input3$a24,labels=c("No","Yes","Not Apply"))
BD_2020_input3$a26<- factor(BD_2020_input3$a26,labels=c("No","Yes","Not Apply"))
BD_2020_input3$a29_1<- factor(BD_2020_input3$a29_1,labels=c("Mainly men",
                                                            "Mainly women",
                                                            "Both sexes equally",
                                                            "Only men",
                                                            "Only women",
                                                            "Not Apply"))
BD_2020_input3$edad<- factor(BD_2020_input3$edad,labels=c("20 to 29",
                                                            "30 to 39",
                                                            "40 to 49",
                                                            "50 to 59",
                                                            ">=60"), ordered=T)

BD_2020_input3$jerarquia_acoso_tot_comp<- factor(BD_2020_input3$jerarquia_acoso_tot_comp,labels=c("No","Yes"))
BD_2020_input3$jerarquia_acoso_tot_sup<- factor(BD_2020_input3$jerarquia_acoso_tot_sup,labels=c("No","Yes"))
BD_2020_input3$jerarquia_acoso_tot_sub<- factor(BD_2020_input3$jerarquia_acoso_tot_sub,labels=c("No","Yes"))
BD_2020_input3$jerarquia_acoso_tot_ext<- factor(BD_2020_input3$jerarquia_acoso_tot_ext,labels=c("No","Yes"))

BD_2020_input3$salgen<- factor(BD_2020_input3$salgen,labels=c("No","Yes"))
BD_2020_input3$jerarquia_acoso_tot_ext<- factor(BD_2020_input3$jerarquia_acoso_tot_ext,labels=c("No","Yes"))
BD_2020_input3$depre<- factor(BD_2020_input3$depre,labels=c("No","Yes"))
BD_2020_input3$psicotrop<- factor(BD_2020_input3$psicotrop,labels=c("No","Yes"))

BD_2020_input3$superv<- factor(BD_2020_input3$superv,labels=c("No","Yes"))
BD_2020_input3$jef_hogar<- factor(BD_2020_input3$jef_hogar,labels=c("No","Yes"))
BD_2020_input3$educacion_media<- factor(BD_2020_input3$educacion_media,labels=c("No","Yes"))
BD_2020_input3$cont_def<- factor(BD_2020_input3$cont_def,labels=c("No","Yes"))
BD_2020_input3$ocuprev_nocalif<- factor(BD_2020_input3$ocuprev_nocalif,labels=c("No","Yes"))
BD_2020_input3$carga_dom<- factor(BD_2020_input3$carga_dom,labels=c("No","Yes"))
BD_2020_input3$org_sind<- factor(BD_2020_input3$org_sind,labels=c("No","Yes"))

BD_2020_input3$latdec_dic<- factor(BD_2020_input3$latdec_dic,labels=c("No","Yes"))
BD_2020_input3$democ_dic<- factor(BD_2020_input3$democ_dic,labels=c("No","Yes"))
BD_2020_input3$aposoc_tot<- factor(BD_2020_input3$aposoc_tot,labels=c("No","Yes"))
BD_2020_input3$desbalance_dic<- factor(BD_2020_input3$desbalance_dic,labels=c("No","Yes"))

BD_2020_input3$turnos<- factor(BD_2020_input3$turnos,labels=c("No","Yes"))
BD_2020_input3$tamano_de_la_empresa<- factor(BD_2020_input3$tamano_de_la_empresa,labels=c("1 to 10","11 to 49", "50 to 199", "200 or more"), ordered=T)
BD_2020_input3$sect_priv<- factor(BD_2020_input3$sect_priv,labels=c("Public sector","Private sector"))
BD_2020_input3$antiguedad<- factor(BD_2020_input3$antiguedad,labels=c("Six months or less","More than six months"))
BD_2020_input3$atpub<- factor(BD_2020_input3$atpub,labels=c("Job w/o customer service","Customer service job"))

BD_2020_input3$climasex<- factor(BD_2020_input3$climasex,labels=c("No","Yes"))

attr(BD_2020_input3$autonaq, "label") <- 'Self-labeling Bullied in the Workplace in the last 6 months'
attr(BD_2020_input3$frec_acos21_1, "label") <- "Self-labeling Frequent WB in the last 6 months (Daily or Weekly)"
attr(BD_2020_input3$a22, "label") <- 'Psychological Harassment in the Workplace in the last 12 months'
attr(BD_2020_input3$a24, "label") <- 'Physical Harassment in the Workplace in the last 12 months'
attr(BD_2020_input3$a26, "label") <- 'Sexual Harassment in the Workplace in the last 12 months'
attr(BD_2020_input3$a29_1, "label") <- 'Sex of the Harassers in the Workplace in the last 12 months'

attr(BD_2020_input3$jerarquia_acoso_tot_comp, "label") <- 'Harassment from Peers in the last 12 months'
attr(BD_2020_input3$jerarquia_acoso_tot_sup, "label") <- 'Harassment from Superiors in the last 12 months'
attr(BD_2020_input3$jerarquia_acoso_tot_sub, "label") <- 'Bottom-up Harassment in the last 12 months'
attr(BD_2020_input3$jerarquia_acoso_tot_ext, "label") <- 'External Harassment in the last 12 months'

attr(BD_2020_input3$latdec_dic, "label") <- 'Low Decisional Latitude (Dichotomized)(JCQ)'
attr(BD_2020_input3$democ_dic, "label") <- 'High Job Demands (Dichotomized)(JCQ)'
attr(BD_2020_input3$aposoc_tot, "label") <- 'Low Social Support (Dichotomized)(JCQ)'
attr(BD_2020_input3$desbalance_dic, "label") <- 'Effort-Reward Imbalance (Dichotomized)(ERI)'

attr(BD_2020_input3$climasex, "label") <- 'Sexist Climate'

attr(BD_2020_input3$a53, "label") <- 'Age'
attr(BD_2020_input3$depre, "label") <- 'Depressive Symptoms'
attr(BD_2020_input3$psicotrop, "label") <- 'Consumption of Psychotropics'
attr(BD_2020_input3$auditc, "label") <- 'Risky drinking (Short AUDIT)'
attr(BD_2020_input3$salgen, "label") <- 'Overall Health (1=Bad & Very Bad)'

attr(BD_2020_input3$edad, "label") <- 'Age (in groups)'
attr(BD_2020_input3$superv, "label") <- 'Supervises the tasks of other workers'
attr(BD_2020_input3$jef_hogar, "label") <- 'Head of Household (1=Yes)'
attr(BD_2020_input3$educacion_media, "label") <- 'Had not Completed Secondary School (Art. Magdalena)'
attr(BD_2020_input3$cont_def, "label") <- 'Fixed-term Contract (Art. Magdalena)'
attr(BD_2020_input3$ocuprev_nocalif, "label") <- 'Unskilled Workers (Plant and Machine Operators, Technicians, Transport Laborers & Unskilled Workers vs. Clerks, Sales Workers, Skilled Service Workers, Managers & Professionals) (Art. Magdalena)'
attr(BD_2020_input3$carga_dom, "label") <- 'Unpaid Care Work at Home (A16)'
attr(BD_2020_input3$ing_hog_xi_am277, "label") <- 'Household Income (Art. Xi y Am)'
attr(BD_2020_input3$a61, "label") <- 'Economic Narrowness'
attr(BD_2020_input3$org_sind, "label") <- 'Absence of Union in Primary Work'

attr(BD_2020_input3$turnos, "label") <- 'System of shifts'
attr(BD_2020_input3$tamano_de_la_empresa, "label") <- 'Size of the Workplace (Adimark)'
attr(BD_2020_input3$sect_priv, "label") <- 'Sector of Workplace (1=Private)'
attr(BD_2020_input3$antiguedad, "label") <- 'Seniority in Workplace'
attr(BD_2020_input3$atpub, "label") <- 'Customer service job'
attr(BD_2020_input3$naq_my_e, "label") <- 'NAQ (Mikkelsen & Einarsen)'


attr(BD_2020_input3$org_sind, "label") <- 'Absence of Union in Primary Work'
attr(BD_2020_input3$gse, "label") <- 'GSE: Socioeconomic Groups'

BD_2020_input3$auditc<- factor(BD_2020_input3$auditc,labels=c("No", "Yes"))
attr(BD_2020_input3$auditc, "label") <- 'Risky drinking (Short AUDIT)'
attr(BD_2020_input3$zona, "label") <- 'Metropolitan Region'

BD_2020_input3$a52<- factor(BD_2020_input3$a52,labels=c("Men", "Women"))
attr(BD_2020_input3$a52, "label") <- 'Sex'

dsgn_BD_naq <- survey::svydesign(ids = ~1, data = BD_2020_input3, weights = ~weight)

dsgn_mujer<- subset(dsgn_BD_naq,mujer==1)
dsgn_hombre<- subset(dsgn_BD_naq,mujer==0)

Descriptive

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#TO SEE HOW DATA BEHAVE
#data%>% dplyr::filter(sig==1)%>% dplyr::select(date)

kableone <- function(x, ...) {
  capture.output(x <- print(x,...))
  knitr::kable(x,
               format= "html", 
               caption = 'Table 1. Summary Table of Variables of Interest',
               col.names=c("Total Sample"), 
               format.args= list(decimal.mark= ".", big.mark= ","))
}
#data%>% dplyr::filter(time>0)%>% group_by(year)%>% summarise(min(as.Date(date)),max(as.Date(date)))
#data%>% dplyr::filter(post>0)%>% group_by(year)%>% summarise(min(as.Date(date)),max(as.Date(date)))
#data%>% dplyr::filter(yearday>0)%>% group_by(year)%>% summarise(min(as.Date(date)),max(as.Date(date)),n())

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#pvals <- compareGroups::getResults(table, "p.overall")

#"climasex", "latdec_dic", "aposoc_tot", "democ_dic", "desbalance_dic", "salgen", 

cat_vars<-
c("jef_hogar", "carga_dom", "educacion_media", "ocuprev_nocalif",
   "org_sind", "tamano_de_la_empresa", "sect_priv", 
  "atpub", "superv", "turnos", "antiguedad", "cont_def", 
  "gse", "a61", "a52", "edad", "zona")

tot_vars<-c(cat_vars)

tab1<-
   svyCreateTableOne(vars = c(tot_vars,"auditc","naq_my_e"), 
                     data= dsgn_BD_naq, 
                     factorVars=c(cat_vars,"auditc","naq_my_e"),
                     #strata = "autonaq", 
                     includeNA =T,
                      smd=T)

kableone(tab1,#nonnormal= c("k6_2","a53"), #nonnormal= c("edad_ini_cons","edad_al_ing"),
                       test=T, varLabels=T,noSpaces=T, printToggle=T, dropEqual=F,smd=T) %>% 
    kableExtra::kable_styling(bootstrap_options = c("striped", "hover","condensed"),font_size= 10) %>%
    kableExtra::row_spec(1, bold = T) %>%
    kableExtra::add_footnote(paste0("Note. Total cases (n= ",nrow(BD_2020_input3),"); NA= Missing values"), notation = "none") %>% 
    kableExtra::scroll_box(width = "100%", height = "400px")
Table 1. Summary Table of Variables of Interest
Total Sample
n 1995.9
Head of Household (1=Yes) (%)
No 562.5 (28.2)
Yes 1433.1 (71.8)
NA 0.4 (0.0)
Unpaid Care Work at Home (A16) (%)
No 470.5 (23.6)
Yes 1522.2 (76.3)
NA 3.2 (0.2)
Had not Completed Secondary School (Art. Magdalena) (%)
No 1562.4 (78.3)
Yes 427.8 (21.4)
NA 5.7 (0.3)
Unskilled Workers (Plant and Machine Operators, Technicians, Transport Laborers & Unskilled Workers vs. Clerks, Sales Workers, Skilled Service Workers, Managers & Professionals) (Art. Magdalena) (%)
No 993.4 (49.8)
Yes 998.5 (50.0)
NA 4.0 (0.2)
Absence of Union in Primary Work (%)
No 709.8 (35.6)
Yes 1235.9 (61.9)
NA 50.2 (2.5)
Size of the Workplace (Adimark) (%)
1 to 10 629.8 (31.6)
11 to 49 466.2 (23.4)
50 to 199 431.4 (21.6)
200 or more 442.1 (22.1)
NA 26.4 (1.3)
Sector of Workplace (1=Private) (%)
Public sector 310.5 (15.6)
Private sector 1660.1 (83.2)
NA 25.4 (1.3)
Customer service job (%)
Job w/o customer service 690.2 (34.6)
Customer service job 1304.9 (65.4)
NA 0.8 (0.0)
Supervises the tasks of other workers (%)
No 1358.6 (68.1)
Yes 631.8 (31.7)
NA 5.5 (0.3)
System of shifts = Yes (%) 370.3 (18.6)
Seniority in Workplace (%)
Six months or less 234.0 (11.7)
More than six months 1760.6 (88.2)
NA 1.3 (0.1)
Fixed-term Contract (Art. Magdalena) (%)
No 1403.6 (70.3)
Yes 567.3 (28.4)
NA 25.0 (1.3)
GSE: Socioeconomic Groups (%)
ABC1 83.0 (4.2)
C2 639.2 (32.0)
C3 725.0 (36.3)
D 492.0 (24.7)
E 56.7 (2.8)
Economic Narrowness (%)
There is enough money, being able to save 341.7 (17.1)
There is just enough money, with no greater difficulties 1077.9 (54.0)
There is not enough money, with some difficulties 440.1 (22.1)
There is not enough money, with greater difficulties 125.2 (6.3)
NA 11.0 (0.5)
Sex = Women (%) 804.3 (40.3)
Age (in groups) (%)
20 to 29 399.5 (20.0)
30 to 39 473.0 (23.7)
40 to 49 474.1 (23.8)
50 to 59 409.9 (20.5)
>=60 239.4 (12.0)
Metropolitan Region (%)
Santiago 1298.2 (65.0)
Valparaíso 335.1 (16.8)
Concepción 362.6 (18.2)
Risky drinking (Short AUDIT) = Yes (%) 538.5 (27.0)
NAQ (Mikkelsen & Einarsen) = 2 (%) 203.7 (10.2)
Note. Total cases (n= 1995); NA= Missing values
kableone <- function(x, ...) {
  capture.output(x <- print(x,...))
  knitr::kable(x,
               format= "html", 
               col.names=c("Men","Women","p-value","test","SMD"),
               caption = 'Table 2. Summary Tables of Variables of Interest, by Sex',
               format.args= list(decimal.mark= ".", big.mark= ","))
}

cat_vars2<-
c("jef_hogar", "carga_dom", "educacion_media", "ocuprev_nocalif",
   "org_sind", "tamano_de_la_empresa", "sect_priv", 
  "atpub", "superv", "turnos", "antiguedad", "cont_def", 
  "gse", "a61", "edad", "zona")

tab2<-
   svyCreateTableOne(vars = c(cat_vars2,"auditc","naq_my_e"), 
                     data= dsgn_BD_naq, 
                     factorVars=c(cat_vars2,"auditc","naq_my_e"),
                     strata = "a52", 
                     includeNA =F,
                      smd=T)

kableone(tab2, #nonnormal= c("k6_2","a53"),
                       test=T, varLabels=T,noSpaces=T, printToggle=T, dropEqual=F,smd=T) %>% 
    kableExtra::kable_styling(bootstrap_options = c("striped", "hover","condensed"),font_size= 10) %>%
    kableExtra::row_spec(1, bold = T) %>%
    kableExtra::add_footnote(paste0("Note. Total cases (n= ",nrow(BD_2020_input3),"); NA= Missing values"), notation = "none") %>% 
    kableExtra::scroll_box(width = "100%", height = "400px")
Table 2. Summary Tables of Variables of Interest, by Sex
Men Women p-value test SMD
n 1191.6 804.3
Head of Household (1=Yes) = Yes (%) 979.9 (82.2) 453.1 (56.4) <0.001 0.584
Unpaid Care Work at Home (A16) = Yes (%) 803.6 (67.6) 718.6 (89.4) <0.001 0.552
Had not Completed Secondary School (Art. Magdalena) = Yes (%) 285.5 (24.0) 142.3 (17.8) 0.003 0.154
Unskilled Workers (Plant and Machine Operators, Technicians, Transport Laborers & Unskilled Workers vs. Clerks, Sales Workers, Skilled Service Workers, Managers & Professionals) (Art. Magdalena) = Yes (%) 670.2 (56.3) 328.3 (41.0) <0.001 0.310
Absence of Union in Primary Work = Yes (%) 744.5 (64.1) 491.3 (62.7) 0.565 0.030
Size of the Workplace (Adimark) (%) 0.030 0.153
1 to 10 360.3 (30.7) 269.5 (33.9)
11 to 49 289.7 (24.7) 176.5 (22.2)
50 to 199 238.6 (20.3) 192.8 (24.3)
200 or more 286.4 (24.4) 155.7 (19.6)
Sector of Workplace (1=Private) = Private sector (%) 1037.5 (87.8) 622.5 (78.9) <0.001 0.240
Customer service job = Customer service job (%) 715.3 (60.0) 589.6 (73.4) <0.001 0.286
Supervises the tasks of other workers = Yes (%) 421.1 (35.4) 210.7 (26.3) <0.001 0.199
System of shifts = Yes (%) 273.6 (23.0) 96.7 (12.0) <0.001 0.291
Seniority in Workplace = More than six months (%) 1044.7 (87.7) 716.0 (89.1) 0.395 0.042
Fixed-term Contract (Art. Magdalena) = Yes (%) 349.5 (29.6) 217.8 (27.5) 0.350 0.047
GSE: Socioeconomic Groups (%) 0.001 0.226
ABC1 51.8 (4.3) 31.2 (3.9)
C2 333.9 (28.0) 305.3 (38.0)
C3 460.0 (38.6) 265.0 (32.9)
D 315.8 (26.5) 176.2 (21.9)
E 30.1 (2.5) 26.6 (3.3)
Economic Narrowness (%) 0.001 0.206
There is enough money, being able to save 217.7 (18.4) 124.1 (15.5)
There is just enough money, with no greater difficulties 671.9 (56.8) 406.0 (50.7)
There is not enough money, with some difficulties 234.0 (19.8) 206.1 (25.7)
There is not enough money, with greater difficulties 59.8 (5.1) 65.4 (8.2)
Age (in groups) (%) <0.001 0.261
20 to 29 251.8 (21.1) 147.7 (18.4)
30 to 39 273.1 (22.9) 199.9 (24.9)
40 to 49 252.5 (21.2) 221.5 (27.5)
50 to 59 237.5 (19.9) 172.4 (21.4)
>=60 176.7 (14.8) 62.7 (7.8)
Metropolitan Region (%) 0.518 0.045
Santiago 767.3 (64.4) 530.9 (66.0)
Valparaíso 199.7 (16.8) 135.3 (16.8)
Concepción 224.6 (18.8) 138.0 (17.2)
Risky drinking (Short AUDIT) = Yes (%) 397.8 (33.4) 140.7 (17.5) <0.001 0.371
NAQ (Mikkelsen & Einarsen) = 2 (%) 124.6 (10.5) 79.1 (9.8) 0.687 0.020
Note. Total cases (n= 1995); NA= Missing values

AUDIT

#,starts_with("p4_afront"),starts_with("p5_animo")
scale2 <- function(x, na.rm = FALSE) (x - mean(x, na.rm = na.rm)) / sd(x, na.rm)

options(knitr.kable.NA = '')

dim_audit_scale<-BD_2020_input3 %>%
  dplyr::mutate_at(.vars=vars(c("a46_top_1","a46_top_12","a46_top_13")),.funs = list(`num`=~ifelse(as.numeric(.)==3,NA,.))) %>% 
  dplyr::mutate_at(.vars=vars(c("a51_1","a51_2","a51_3",
                                "a46_top_1_num", "a46_top_12_num", "a46_top_13_num")),.funs = list(`num_audit`=~scale2(as.numeric(.),na.rm=T))) %>% 
  dplyr::select(ends_with("num_audit")) 

dim_auditc_gral<-BD_2020_input3 %>% 
  dplyr::select("a51_1","a51_2","a51_3",
                "a46_top_1","a46_top_12","a46_top_13") %>% 
  dplyr::mutate(a51_1=factor(dplyr::case_when(a51_1==1~"Nunca",a51_1==2~"Una vez al mes o menos",
                                            a51_1==3~"2 a 4 veces al mes",a51_1==4~"2 a 3 veces a la semana",
                                            a51_1==5~"4 o más veces a la semana"),
                             levels=c("Nunca","Una vez al mes o menos","2 a 4 veces al mes",
                                      "2 a 3 veces a la semana","4 o más veces a la semana"),
                             ordered=T)) %>%
  dplyr::mutate(a51_2=factor(dplyr::case_when(a51_2==1~"0 a 2 tragos",a51_2==2~"3 a 4 tragos",
                                            a51_2==3~"5 a 6 tragos",a51_2==4~"7 a 9 tragos",
                                            a51_2==5~"10 o más tragos"),
                             levels=c("0 a 2 tragos","3 a 4 tragos","5 a 6 tragos",
                                      "7 a 9 tragos","10 o más tragos"),
                             ordered=T)) %>% 
    dplyr::mutate(a51_3=factor(dplyr::case_when(a51_3==1~"Nunca",a51_3==2~"Menos de una vez al mes",
                                            a51_3==3~"Mensualmente",a51_3==4~"Semanalmente",
                                            a51_3==5~"Todos o casi todos los días"),
                             levels=c("Nunca","Menos de una vez al mes","Mensualmente",
                                      "Semanalmente","Todos o casi todos los días"),
                             ordered=T)) %>% 
      dplyr::mutate(a46_top_1=factor(dplyr::case_when(a46_top_1=="2"~"Yes",
                                                      a46_top_1=="1"~"No",
                                                      T~NA_character_),
                             levels=c("No","Yes"))) %>%
        dplyr::mutate(a46_top_12=factor(dplyr::case_when(a46_top_12=="2"~"Yes",
                                                      a46_top_12=="1"~"No",
                                                      T~NA_character_),
                             levels=c("No","Yes"))) %>%
        dplyr::mutate(a46_top_13=factor(dplyr::case_when(a46_top_13=="2"~"Yes",
                                                      a46_top_13=="1"~"No",
                                                      T~NA_character_),
                             levels=c("No","Yes")))

dim_auditc_num<- BD_2020_input3 %>% 
  dplyr::mutate_at(.vars=vars(c("a46_top_1","a46_top_12","a46_top_13")),.funs = ~ifelse(as.numeric(.)==3,NA,.)) %>% 
  dplyr::mutate_at(.vars=vars(c("a46_top_1","a46_top_12","a46_top_13")),.funs = list(`num_audit`=~if_else(.==2,1,0,NA_real_))) %>%
  dplyr::mutate_at(.vars=vars(c("a51_1","a51_2","a51_3")),.funs = list(`num_audit`=~as.numeric(.))) %>%
    
  dplyr::select(ends_with("num_audit")) %>% 
  na.omit() %>% 
  data.frame()

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
MVN::mvn(dim_auditc_num[,c("a51_1_num_audit", "a51_2_num_audit", "a51_3_num_audit")], subset = NULL, mvnTest = "mardia", covariance = TRUE, tol = 1e-25, alpha = 0.5,scale = FALSE, desc = TRUE, transform = "none", R = 1000,univariateTest = "Lillie",univariatePlot = "none", multivariatePlot = "none",multivariateOutlierMethod = "none", bc = FALSE, bcType = "rounded",showOutliers = FALSE, showNewData = FALSE)$multivariateNormality%>%
  data.frame()%>%
  dplyr::mutate(across(c("Statistic","p.value"),~ifelse(grepl("NA",.),NA_real_,as.numeric(as.character(.)))))%>%
  dplyr::mutate(Statistic=sprintf("%8.2f",Statistic))%>%
  dplyr::mutate(p.value=sprintf("%5.3f",p.value))%>%
  dplyr::mutate(across(c("Statistic","p.value"),~ifelse(grepl("NA",.),"-",as.character(.))))%>%
   knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 3a1. Multivariate Normality, AUDIT"),
               col.names = c("Test","Statistic","P value","Normality"),
align =c('l',rep('c', 101)))%>%
    kableExtra::add_footnote(c("Note. Mardia Skewness= Mardia Multivariable Skewness(1970); Mardia Kurtosis= Mardia Kurtosis (1970)"), notation = "none")%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 10)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## sROC 0.1-2 loaded
Table 3a1. Multivariate Normality, AUDIT
Test Statistic P value Normality
Mardia Skewness 2906.18 0.000 NO
Mardia Kurtosis 50.61 0.000 NO
MVN
NO
Note. Mardia Skewness= Mardia Multivariable Skewness(1970); Mardia Kurtosis= Mardia Kurtosis (1970)
MVN::mvn(dim_auditc_num[,c("a46_top_1_num_audit", "a46_top_12_num_audit", "a46_top_13_num_audit")], subset = NULL, mvnTest = "mardia", covariance = TRUE, tol = 1e-25, alpha = 0.5,scale = FALSE, desc = TRUE, transform = "none", R = 1000,univariateTest = "Lillie",univariatePlot = "none", multivariatePlot = "none",multivariateOutlierMethod = "none", bc = FALSE, bcType = "rounded",showOutliers = FALSE, showNewData = FALSE)$multivariateNormality%>%
  data.frame()%>%
  dplyr::mutate(across(c("Statistic","p.value"),~ifelse(grepl("NA",.),NA_real_,as.numeric(as.character(.)))))%>%
  dplyr::mutate(Statistic=sprintf("%8.2f",Statistic))%>%
  dplyr::mutate(p.value=sprintf("%5.3f",p.value))%>%
  dplyr::mutate(across(c("Statistic","p.value"),~ifelse(grepl("NA",.),"-",as.character(.))))%>%
   knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 3a2. Multivariate Normality, Psychotropic medication"),
               col.names = c("Test","Statistic","P value","Normality"),
align =c('l',rep('c', 101)))%>%
    kableExtra::add_footnote(c("Note. Mardia Skewness= Mardia Multivariable Skewness(1970); Mardia Kurtosis= Mardia Kurtosis (1970)"), notation = "none")%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 10)
Table 3a2. Multivariate Normality, Psychotropic medication
Test Statistic P value Normality
Mardia Skewness 9877.41 0.000 NO
Mardia Kurtosis 205.00 0.000 NO
MVN
NO
Note. Mardia Skewness= Mardia Multivariable Skewness(1970); Mardia Kurtosis= Mardia Kurtosis (1970)
cbind(Models=c("All", "Short AUDIT","Psychotropic medication"),
      KMO=c(round(psych::KMO(na.omit(dim_auditc_num))$MSA,2),
            round(psych::KMO(dim_auditc_num[,c("a51_1_num_audit", "a51_2_num_audit", "a51_3_num_audit")])$MSA,2),
            round(psych::KMO(dim_auditc_num[,c("a46_top_1_num_audit", "a46_top_12_num_audit", "a46_top_13_num_audit")])$MSA,2))
) %>% 
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
                 caption = paste0("Table 3b. Keyser-Meyer-Olkin"),
                 align =c('l',rep('c', 101)))%>%
    #kableExtra::add_footnote(c("Nota. Los ítems son")), notation = "none")%>%
    kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 10)  
Table 3b. Keyser-Meyer-Olkin
Models KMO
All 0.7
Short AUDIT 0.71
Psychotropic medication 0.71
hetcor_mat_auditc<-polycor::hetcor(dim_auditc_gral, ML = T, std.err = T, use="complete.obs", bins=4, pd=TRUE)#
#misty::poly.cor(na.omit(dim_auditc_gral), smooth = TRUE, global = TRUE, weight = BD_2020_input3$weight, correct = 0, progress = TRUE,
#         na.rm = TRUE, delete = TRUE, tri = c("both", "lower", "upper"),
#         digits = 2, as.na = NULL, check = TRUE, output = TRUE)

hetcorauditc_df<-
reshape2::melt(tibble::as_tibble(hetcor_mat_auditc$correlations,rownames = "rowname")) %>% 
    dplyr::left_join(melt(tibble::as_tibble(hetcor_mat_auditc$tests,rownames = "rowname")),by=c("rowname","variable")) %>% 
  dplyr::rename("Var1"="rowname", "Var2"="variable", "corr"="value.x", "pval"="value.y") %>% 
  dplyr::mutate(Var1_lab=dplyr::case_when(Var1=="a51_1"~"A continuación, quisiera hacerle algunas preguntas respecto a consumo de alcohol. ¿Qué tan seguido toma usted alguna bebida alcohólica?",
                                          Var1=="a51_2"~"¿Cuántos tragos suele tomar usted en un día típico de consumo de alcohol?",
                                          Var1=="a51_3"~"¿Qué tan seguido toma usted 5 o más tragos en una sola ocasión?",
                                          Var1=="a46_top_1"~"¿Disminuir la ansiedad o el nerviosismo, tales como ansiolíticos?",
                                          Var1=="a46_top_12"~"¿Ayudar a dormir, tales como hipnóticos?",
                                          Var1=="a46_top_13"~"¿Remontar el ánimo tales como antidepresivos?")) %>% 
  dplyr::mutate(Var2_lab=dplyr::case_when(Var2=="a51_1"~"A continuación, quisiera hacerle algunas preguntas respecto a consumo de alcohol. ¿Qué tan seguido toma usted alguna bebida alcohólica?",
                                          Var2=="a51_2"~"¿Cuántos tragos suele tomar usted en un día típico de consumo de alcohol?",
                                          Var2=="a51_3"~"¿Qué tan seguido toma usted 5 o más tragos en una sola ocasión?",
                                          Var2=="a46_top_1"~"¿Disminuir la ansiedad o el nerviosismo, tales como ansiolíticos?",
                                          Var2=="a46_top_12"~"¿Ayudar a dormir, tales como hipnóticos?",
                                          Var2=="a46_top_13"~"¿Remontar el ánimo tales como antidepresivos?"))
## Using rowname as id variables
## Using rowname as id variables
pd2 <- hetcorauditc_df %>% 
  dplyr::mutate(data_id = paste0(Var1, '-', Var2),
         tooltip = paste0('Y: ',Var1_lab, '<br>', 'X: ',Var2_lab, '<br>Rho: ', round(as.numeric(corr), 2), 
                          '<br>p', ifelse(pval<.001,"<0.001",paste0("=",sprintf("%.4f",pval)))))

p2 <- ggplot(pd2) +
  geom_tile_interactive(aes(Var2,Var1, fill = corr,
                            tooltip = tooltip
                            ), color = "gray")  +
  scale_fill_gradient2(low = "#E46726", high = "#6D9EC1",
                       mid = "white", midpoint = 0, limit = c(-1, 1), space = "Lab",
                       name = "Corr") +
  geom_text(mapping = aes(x = Var1, y = Var2, label = round(corr, 2)), size = 2) +
  ggplot2::coord_fixed() +
  theme_minimal() +
  theme(axis.text.x = element_blank())+
  theme(axis.text.y = element_blank())+
  guides(fill = FALSE) +
  xlab("") + ylab("")
girafe(ggobj = p2)

Figure 2a. Polychoric correlation matix of AUDIT Short

dim_auditc_num2<- BD_2020_input3 %>% 
  dplyr::mutate_at(.vars=vars(c("a46_top_1","a46_top_12","a46_top_13")),.funs = ~ifelse(as.numeric(.)==3,NA,.)) %>% 
  dplyr::mutate_at(.vars=vars(c("a46_top_1","a46_top_12","a46_top_13")),.funs = list(`num_audit`=~if_else(.==2,1,0,NA_real_))) %>%
  dplyr::mutate_at(.vars=vars(c("a51_1","a51_2","a51_3")),.funs = list(`num_audit`=~as.numeric(.))) %>%
  dplyr::select(contains("num_audit"),weight) %>% 
  na.omit()

auditc_policor_wgt <- psych::cor.ci(dim_auditc_num2[,c("a46_top_1_num_audit","a46_top_12_num_audit","a46_top_13_num_audit","a51_1_num_audit","a51_2_num_audit","a51_3_num_audit")],
                  global=T,ML=T, std.err=FALSE,weight=dim_auditc_num2$weight, poly=T,
                  correct=.5,progress=T,na.rm=T, delete=T)
hetcorauditc_df2<-
reshape2::melt(tibble::as_tibble(auditc_policor_wgt$rho,rownames = "rowname")) %>% 
    dplyr::mutate(comb1=dplyr::case_when(rowname=="a46_top_1_num_audit"~"a46__1_",
                                        rowname=="a46_top_12_num_audit"~"a46__12",
                                        rowname=="a46_top_13_num_audit"~"a46__13",
                                        rowname=="a51_1_num_audit"~"a51_1",
                                        rowname=="a51_2_num_audit"~"a51_2",
                                        rowname=="a51_3_num_audit"~"a51_3")) %>% 
  dplyr::mutate(comb2=dplyr::case_when(variable=="a46_top_1_num_audit"~"a46__1_",
                                        variable=="a46_top_12_num_audit"~"a46__12",
                                        variable=="a46_top_13_num_audit"~"a46__13",
                                        variable=="a51_1_num_audit"~"a51_1",
                                        variable=="a51_2_num_audit"~"a51_2",
                                        variable=="a51_3_num_audit"~"a51_3")) %>% 
  dplyr::mutate(comb3=paste0(comb1,"-",comb2)) %>% 
  dplyr::mutate(comb4=paste0(comb2,"-",comb1)) %>% 
  dplyr::left_join(data.table::data.table(round(auditc_policor_wgt$ci[,c("lower","upper")],2),keep.rownames = T),by=c("comb3"="rn")) %>% 
    dplyr::left_join(data.table::data.table(round(auditc_policor_wgt$ci[,c("lower","upper")],2),keep.rownames = T),by=c("comb4"="rn")) %>%
  dplyr::mutate(lo=ifelse(is.na(lower.y),lower.x,lower.y)) %>% 
  dplyr::mutate(up=ifelse(is.na(upper.y),upper.x,upper.y)) %>% 
  dplyr::select(-lower.x, -upper.x, -lower.y, -upper.y) %>% 
  dplyr::mutate(Var1_lab=factor(dplyr::case_when(grepl("a51_1",rowname)~"1.¿Qué tan seguido toma usted alguna bebida alcohólica?",
                                          grepl("a51_2",rowname)~"2.¿Cuántos tragos suele tomar usted en un día típico de consumo de alcohol?",
                                          grepl("a51_3",rowname)~"3.¿Qué tan seguido toma usted 5 o más tragos en una sola ocasión?",
                                          grepl("a46_top_1$",rowname)~"4.¿Disminuir la ansiedad o el nerviosismo, tales como ansiolíticos?",
                                          grepl("a46_top_12",rowname)~"5.¿Ayudar a dormir, tales como hipnóticos?",
                                          grepl("a46_top_13",rowname)~"6.¿Remontar el ánimo tales como antidepresivos?")))%>% 
  dplyr::mutate(Var2_lab=factor(dplyr::case_when(grepl("a51_1",variable)~"1.¿Qué tan seguido toma usted alguna bebida alcohólica?",
                                          grepl("a51_2",variable)~"2.¿Cuántos tragos suele tomar usted en un día típico de consumo de alcohol?",
                                          grepl("a51_3",variable)~"3.¿Qué tan seguido toma usted 5 o más tragos en una sola ocasión?",
                                          grepl("a46_top_1$",variable)~"4.¿Disminuir la ansiedad o el nerviosismo, tales como ansiolíticos?",
                                          grepl("a46_top_12",variable)~"5.¿Ayudar a dormir, tales como hipnóticos?",
                                          grepl("a46_top_13",variable)~"6.¿Remontar el ánimo tales como antidepresivos?")))%>% 
  dplyr::select(-rowname, -variable) %>% 
  dplyr::mutate(comb1=factor(comb1,levels=c("a46__1_", "a46__12", "a46__13","a51_1","a51_2","a51_3"))) %>% 
  dplyr::mutate(comb2=factor(comb2,levels=c("a46__1_", "a46__12", "a46__13","a51_1","a51_2","a51_3"))) %>% 
  dplyr::arrange(comb1,comb2)
## Using rowname as id variables
pd3 <- hetcorauditc_df2 %>% 
  dplyr::mutate(tooltip = paste0('Y: ',Var1_lab, '<br>', 'X: ',Var2_lab, '<br>Rho: ', sprintf("%1.2f",as.numeric(value)), 
                          ' [',sprintf("%1.2f",lo),",",sprintf("%1.2f",up),"]"))

p3 <- ggplot(pd3) +
  geom_tile_interactive(aes(Var1_lab,Var2_lab, fill = value,
                            tooltip = tooltip
                            ), color = "gray")  +
  scale_fill_gradient2(low = "#E46726", high = "#6D9EC1",
                       mid = "white", midpoint = 0, limit = c(-1, 1), space = "Lab",
                       name = "Corr") +
 geom_text(mapping = aes(x = Var1_lab, y = Var2_lab, label = sprintf("%1.2f",value)), size = 2) +
  ggplot2::coord_fixed() +
  theme_minimal() +
  theme(axis.text.x = element_blank())+
  theme(axis.text.y = element_blank())+
  guides(fill = FALSE) +
  xlab("") + ylab("")
girafe(ggobj = p3)

Figure 2b. Polychoric correlation matix of AUDIT Short (Survey Weighted)

alfa_dim_dim_auditc_gral1<-psych::alpha(dim_auditc_num[,c("a51_1_num_audit", "a51_2_num_audit", "a51_3_num_audit")],
                                        use="complete.obs",n.obs=nrow(dim_auditc_num),check.keys=T)

data.table(cbind.data.frame(var_lab=c("A continuación, quisiera hacerle algunas preguntas respecto a consumo de alcohol. ¿Qué tan seguido toma usted alguna bebida alcohólica?",
"¿Cuántos tragos suele tomar usted en un día típico de consumo de alcohol?",
"¿Qué tan seguido toma usted 5 o más tragos en una sola ocasión?"),alfa_dim_dim_auditc_gral1$alpha.drop),
           keep.rownames = T)%>%
    dplyr::mutate(var_lab=dplyr::case_when(grepl("-$",rn)~paste0(var_lab," (REVERTIDO)"),
                                                     T~var_lab)) %>% 
    dplyr::select(var_lab, raw_alpha, std.alpha,`G6(smc)`)%>%
    dplyr::mutate_at(vars(raw_alpha, std.alpha,`G6(smc)`),~round(.,2)) %>% 
   knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 3c1. Cronbach's Alpha if item is deleted (AUDIT)",paste0("(alfa= ",round(alfa_dim_dim_auditc_gral1$total$std.alpha,2),", SMC= ",round(alfa_dim_dim_auditc_gral1$total$`G6(smc`,2),")")),
               col.names = c("Item","Raw Alpha","Std. Alpha","SMC"),
align =c('l',rep('c', 101)))%>%
    kableExtra::add_footnote(c("Note. SMC= Multiple Squared Correlations"), notation = "none")%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 12)
Table 3c1. Cronbach’s Alpha if item is deleted (AUDIT)(alfa= 0.82, SMC= 0.75)
Item Raw Alpha Std. Alpha SMC
A continuación, quisiera hacerle algunas preguntas respecto a consumo de alcohol. ¿Qué tan seguido toma usted alguna bebida alcohólica? 0.79 0.80 0.67
¿Cuántos tragos suele tomar usted en un día típico de consumo de alcohol? 0.73 0.73 0.58
¿Qué tan seguido toma usted 5 o más tragos en una sola ocasión? 0.69 0.71 0.56
Note. SMC= Multiple Squared Correlations
alfa_dim_dim_auditc_gral2<-psych::alpha(dim_auditc_num[,c("a46_top_1_num_audit", "a46_top_12_num_audit", "a46_top_13_num_audit")],
                                        use="complete.obs",n.obs=nrow(dim_auditc_num),check.keys=T)

data.table(cbind.data.frame(var_lab=c("¿Disminuir la ansiedad o el nerviosismo, tales como ansiolíticos?",
"¿Ayudar a dormir, tales como hipnóticos?",
"¿Remontar el ánimo tales como antidepresivos?"),alfa_dim_dim_auditc_gral2$alpha.drop),
           keep.rownames = T)%>%
    dplyr::mutate(var_lab=dplyr::case_when(grepl("-$",rn)~paste0(var_lab," (REVERTIDO)"),
                                                     T~var_lab)) %>% 
    dplyr::select(var_lab, raw_alpha, std.alpha,`G6(smc)`)%>%
    dplyr::mutate_at(vars(raw_alpha, std.alpha,`G6(smc)`),~round(.,2)) %>% 
   knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 3c2. Cronbach's Alpha if item is deleted (Psy Med)",paste0("(alfa= ",round(alfa_dim_dim_auditc_gral2$total$std.alpha,2),", SMC= ",round(alfa_dim_dim_auditc_gral2$total$`G6(smc`,2),")")),
               col.names = c("Item","Raw Alpha","Std. Alpha","SMC"),
align =c('l',rep('c', 101)))%>%
    kableExtra::add_footnote(c("Note. SMC= Multiple Squared Correlations"), notation = "none")%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 12)
Table 3c2. Cronbach’s Alpha if item is deleted (Psy Med)(alfa= 0.8, SMC= 0.72)
Item Raw Alpha Std. Alpha SMC
¿Disminuir la ansiedad o el nerviosismo, tales como ansiolíticos? 0.71 0.72 0.56
¿Ayudar a dormir, tales como hipnóticos? 0.71 0.72 0.56
¿Remontar el ánimo tales como antidepresivos? 0.72 0.72 0.57
Note. SMC= Multiple Squared Correlations
rbind(
cbind.data.frame(Models="All",data.frame(t(psych::unidim(dim_auditc_num,cor= "poly")$uni))),
cbind.data.frame(Models="Psychotropic Medication",data.frame(t(psych::unidim(dim_auditc_num[,c("a46_top_1_num_audit", "a46_top_12_num_audit", "a46_top_13_num_audit")],cor= "poly")$uni))),
cbind.data.frame(Models="AUDIT Short",data.frame(t(psych::unidim(dim_auditc_num[,c("a51_1_num_audit", "a51_2_num_audit", "a51_3_num_audit")],cor= "poly")$uni))))%>% 
  dplyr::select(`Models`,`u`,`Unidim.A`,`alpha`,`av.r`,`median.r`) %>% 
  dplyr::mutate_if(is.numeric, round, 2) %>% 
   knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 3d. Unidimensionality, AUDIT Short"),
               col.names = c("Models","Raw Unidimensionality","Adjusted Unidimensionality","Std. Alpha","Mean of Intraclass correlation coefficients (ICC)", "Median of Intraclass correlation coefficients (ICC)"),
align =c('l',rep('c', 101)))%>%
    #kableExtra::add_footnote(c("Nota. Los ítems son")), notation = "none")%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 13)  
Table 3d. Unidimensionality, AUDIT Short
Models Raw Unidimensionality Adjusted Unidimensionality Std. Alpha Mean of Intraclass correlation coefficients (ICC) Median of Intraclass correlation coefficients (ICC)
All 0.27 0.75 0.76 0.35 0.10
Psychotropic Medication 1.00 1.00 0.95 0.86 0.86
AUDIT Short 1.00 1.00 0.90 0.75 0.73
##Koo and Li (2016) gives the following suggestion for interpreting ICC (Koo and Li 2016):
#below 0.50: poor
#between 0.50 and 0.75: moderate
#between 0.75 and 0.90: good
#above 0.90: excellent

We used the survey-weighted polychoric correlations to do the parallel analysis.

#Parallel analysis compares factor and principal components solutions to the real data as well as resampled data
poly_fa<-
fa.parallel(auditc_policor_wgt$rho, fm="pa", fa="both", n.obs=nrow(dim_auditc_num), nfactors=3, error.bars=F,n.iter=500,SMC=T,quant=.95,
            sim=T,show.legend=FALSE, main="Scree plot with parallel analysis",cor="poly")
Figure 2c. Parallel Analysis of AUDIT-c

Figure 2c. Parallel Analysis of AUDIT-c

## Parallel analysis suggests that the number of factors =  3  and the number of components =  2
poly_fa
## Call: fa.parallel(x = auditc_policor_wgt$rho, n.obs = nrow(dim_auditc_num), 
##     fm = "pa", fa = "both", nfactors = 3, main = "Scree plot with parallel analysis", 
##     n.iter = 500, error.bars = F, SMC = T, show.legend = FALSE, 
##     sim = T, quant = 0.95, cor = "poly")
## Parallel analysis suggests that the number of factors =  3  and the number of components =  2 
## 
##  Eigen Values of 
## 
##  eigen values of factors
## [1]  2.61  2.17  0.07 -0.02 -0.06 -0.12
## 
##  eigen values of simulated factors
## [1]  0.08  0.04  0.01 -0.01 -0.04 -0.07
## 
##  eigen values of components 
## [1] 2.79 2.43 0.31 0.25 0.14 0.07
## 
##  eigen values of simulated components
## [1] 1.07 1.04 1.01 0.99 0.96 0.93
library(psych)
library(GPArotation)
## 
## Attaching package: 'GPArotation'
## The following object is masked from 'package:entropy':
## 
##     entropy
library(dplyr)
library(plyr)
library(knitr)

efas <- list()

for (i in 1:10) {
    fitn <- psych::fa(auditc_policor_wgt$rho,
                      nfactors= i, 
                      cor= "poly", 
                      fm= "pa", 
                      use="complete.obs", 
                      n.obs= nrow(dim_auditc_num),
                      #n.iter=500, 
                      rotate="oblimin")
    efas[[i]] <- data.frame(fitn$TLI, fitn$RMSEA[1], fitn$RMSEA[2], fitn$RMSEA[3], fitn$rms, fitn$BIC) %>% 
        mutate(Factors = i,
               RMSEA_ci = paste0(round(fitn.RMSEA.2.,2),", ",round(fitn.RMSEA.3.,2))) %>% 
        dplyr::rename(TLI = fitn.TLI,
                      RMSEA = fitn.RMSEA.1.,
                      "RMSEA CI90%" = RMSEA_ci,
                      SRMR = fitn.rms, 
                      BIC = fitn.BIC) %>% 
        dplyr::select(Factors, TLI, RMSEA, "RMSEA CI90%", SRMR, BIC)
}
## mutate: new variable 'Factors' (integer) with one unique value and 0% NA
##         new variable 'RMSEA_ci' (character) with one unique value and 0% NA
## mutate: new variable 'Factors' (integer) with one unique value and 0% NA
##         new variable 'RMSEA_ci' (character) with one unique value and 0% NA
## Error in data.frame(fitn$TLI, fitn$RMSEA[1], fitn$RMSEA[2], fitn$RMSEA[3], : arguments imply differing number of rows: 1, 0
do.call("rbind", efas)  %>%
    data.table::data.table(keep.rownames = F) %>% 
    dplyr::mutate_at(c(2,3,5),~round(as.numeric(.),2)) %>% 
    dplyr::mutate_at(c(6),~round(as.numeric(.),0)) %>% 
    knitr::kable(format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
                 caption = paste0("Tabla 3e. Fit indices, Exploratory Factor Analysis AUDIT and Psy Medication"),
                 #col.names = c("Prueba","Estadistico","Valor p","Normalidad"),
                 align =c('l',rep('c', 101)))%>%
    kableExtra::add_footnote(c("Nota. TLI= Tucker-Lewis Index; RMSEA= Root Mean Square Error of Approximation; SRMR= Standardized Root Mean Residual; BIC= Bayesian Information Criterion"), notation = "none")%>%
    kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 12)
Tabla 3e. Fit indices, Exploratory Factor Analysis AUDIT and Psy Medication
Factors TLI RMSEA RMSEA CI90% SRMR BIC
1 0.24 0.52 0.5, 0.53 0.34 4,700
2 0.71 0.32 0.3, 0.34 0.03 794
Nota. TLI= Tucker-Lewis Index; RMSEA= Root Mean Square Error of Approximation; SRMR= Standardized Root Mean Residual; BIC= Bayesian Information Criterion


mod_2f <-
    psych::fa(auditc_policor_wgt$rho,
              nfactors= 2, 
              cor= "poly", 
              fm= "pa", 
              use="complete.obs", 
              n.obs= nrow(dim_auditc_num),
              #n.iter=500, 
              rotate="oblimin")

#En el caso de las rotaciones oblicuas, se parte del supuesto de correlación entre los nuevos factores, que en la vida real es el escenario más común, lo que conduce a que las ponderaciones calculadas no coincidan con las correlaciones entre el factor y la variable. Dentro de los métodos de rotación oblicua más utilizados se encuentran el oblimin y el promax. La rotación oblimin permite establecer relaciones jerárquicas entre los factores, para lo cual debe establecer el grado de inclinación (δ) entre ellos. Un valor δ de cero da las rotaciones más oblicuas (3,5).

#En cuanto a la rotación promax, modifica los resultados de una rotación ortogonal hasta crear una solución con cargas factoriales lo más próximas posible a la "estructura ideal". Para ello, eleva las cargas factoriales obtenidas en una rotación ortogonal a una determinada potencia (conocida como κ). En general, los valores de κ se encuentran entre 2 y 4, pero, a mayor potencia, mayor oblicuidad en la solución (el valor de κ más común es de 4) (3,5).

# Méndez Martínez, Carolina, & Rondón Sepúlveda, Martín Alonso. (2012). Introducción al análisis factorial exploratorio. Revista Colombiana de Psiquiatría, 41(1), 197-207. Retrieved February 24, 2021, from http://www.scielo.org.co/scielo.php?script=sci_arttext&pid=S0034-74502012000100014&lng=en&tlng=es.

#:#:#:#:#:#:#:#:#:#:#:#:#:#:
# The Root Mean Square Error of Approximation (RMSEA) provides information as to how well the model, with unknown but optimally chosen parameter estimates, would fit the population covariance matrix (Byrne, 1998).
# One of its key advantages is that the RMSEA calculates confidence intervals around its value.
# Values below .060 indicate close fit (Hu & Bentler, 1999). Values up to .080 are commonly accepted as adequate.
# The Standardized Root Mean Residual (SRMR) is the square root of the difference between the residuals of the sample covariance matrix and the hypothesized covariance model.
# As SRMR is standardized, its values range between 0 and 1. Commonly, models with values below .05 threshold are considered to indicate good fit (Byrne, 1998). Also, values up to .08 are acceptable (Hu & Bentler, 1999).
# TLI’s values may fall below zero or be above one (Hair et al., 2013). For CFI and TLI values above .95 are indicative of good fit (Hu & Bentler, 1999). In practice, CFI and TLI values from .90 to .95 are considered acceptable.


#Hair, R. D., Black, W. C., Babin, B. J., Anderson, R. E., & Tatham, R. L. (2013). Multivariate data analysis. Englewood Cliffs, NJ: Prentice–Hall.

#:#:#:#:#:#:#:#:#:#:#:#:#:#:
# Situaciones en que las saturaciones sean bajas (menor de .40) y el número de ítems por factor también sea bajo (3 ítems) requieren tamaños muestrales mayores para tener alguna garantía de generalización de los resultados. Como es habitual utilizar muestras de conveniencia hay que tener en cuenta dos problemas: la no-representatividad y la atenuación por restricción de rango.
# Lloret-Segura, Susana, Ferreres-Traver, Adoración, Hernández-Baeza, Ana, & Tomás-Marco, Inés. (2014). El Análisis Factorial Exploratorio de los Ítems: una guía práctica, revisada y actualizada. Anales de Psicología, 30(3), 1151-1169. https://dx.doi.org/10.6018/analesps.30.3.199361

mod_2fa <-
    data.table::data.table(matrix(as.numeric(loadings(mod_2f)), attributes(loadings(mod_2f))$dim, dimnames=attributes(loadings(mod_2f))$dimnames),keep.rownames = T)%>%
    dplyr::mutate(label=factor(dplyr::case_when(grepl("a51_1",rn)~"1.¿Qué tan seguido toma usted alguna bebida alcohólica?",
    grepl("a51_2",rn)~"2.¿Cuántos tragos suele tomar usted en un día típico de consumo de alcohol?",
    grepl("a51_3",rn)~"3.¿Qué tan seguido toma usted 5 o más tragos en una sola ocasión?",
    grepl("a46_top_12",rn)~"5.¿Ayudar a dormir, tales como hipnóticos?",
    grepl("a46_top_13",rn)~"6.¿Remontar el ánimo tales como antidepresivos?",
    grepl("a46_top_1",rn)~"4.¿Disminuir la ansiedad o el nerviosismo, tales como ansiolíticos?")))%>% 
  dplyr::select(label, paste0("PA",1:2)) %>% 
  dplyr::arrange(label) %>% 
    dplyr::mutate_at(1:dim(loadings(mod_2f))[2]+1, ~round(as.numeric(.),2)) %>% 
    dplyr::mutate_at(1:dim(loadings(mod_2f))[2]+1, ~ifelse(.<.3,"-",sprintf("%2.2f",.))) %>% 
  data.frame()

colnames(mod_2fa)<- c("Variables",paste0( paste0("PA",1:attributes(loadings(mod_2f))$dim[2]),"\nR2=",round(mod_2f$R2,2)))

mod_2fa %>% 
    knitr::kable(format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
                 caption = paste0("Tabla 3f. Factor Loadings, questions of AUDIT and psychotropics medication"),
                 #col.names = c("Prueba","Estadistico","Valor p","Normalidad"),
                 align =c('l',rep('c', 101)))%>%
    kableExtra::add_footnote(c("Nota. Se omiten cargas factoriales menores a .3"), notation = "none")%>%  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 13)
Tabla 3f. Factor Loadings, questions of AUDIT and psychotropics medication
Variables PA1 R2=0.95 PA2 R2=0.91
1.¿Qué tan seguido toma usted alguna bebida alcohólica?
0.81
2.¿Cuántos tragos suele tomar usted en un día típico de consumo de alcohol?
0.89
3.¿Qué tan seguido toma usted 5 o más tragos en una sola ocasión?
0.89
4.¿Disminuir la ansiedad o el nerviosismo, tales como ansiolíticos? 0.93
5.¿Ayudar a dormir, tales como hipnóticos? 0.92
6.¿Remontar el ánimo tales como antidepresivos? 0.93
Nota. Se omiten cargas factoriales menores a .3

AFC

#Dado que es un modelo que pordría tener problemas de identificación debido a la cantidad de parámetros, introduciremos a la estimación el método de estanndarización de la varianza, en el que fija la varianza del factor a 1 pero estima libremente todas las cargas factoriales como se señala [en este enlace](https://stats.idre.ucla.edu/r/seminars/rcfa/).

library(lavaan)  
## Warning: package 'lavaan' was built under R version 4.0.3
## This is lavaan 0.6-7
## lavaan is BETA software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
## 
##     cor2cov
library(semPlot)
## Warning: package 'semPlot' was built under R version 4.0.3
## Registered S3 methods overwritten by 'huge':
##   method    from   
##   plot.sim  BDgraph
##   print.sim BDgraph
library(semTools)
## Warning: package 'semTools' was built under R version 4.0.3
## 
## ###############################################################################
## This is semTools 0.5-4
## All users of R (or SEM) are invited to submit functions or ideas for functions.
## ###############################################################################
## 
## Attaching package: 'semTools'
## The following object is masked from 'package:readr':
## 
##     clipboard
## The following object is masked from 'package:psych':
## 
##     skew
  # Latent variables

dim_auditc_gral2<-BD_2020_input3 %>% 
  dplyr::select("a51_1","a51_2","a51_3",
                "a46_top_1","a46_top_12","a46_top_13","weight") %>% 
  dplyr::mutate(a51_1=factor(dplyr::case_when(a51_1==1~"Nunca",a51_1==2~"Una vez al mes o menos",
                                            a51_1==3~"2 a 4 veces al mes",a51_1==4~"2 a 3 veces a la semana",
                                            a51_1==5~"4 o más veces a la semana"),
                             levels=c("Nunca","Una vez al mes o menos","2 a 4 veces al mes",
                                      "2 a 3 veces a la semana","4 o más veces a la semana"),
                             ordered=T)) %>%
  dplyr::mutate(a51_2=factor(dplyr::case_when(a51_2==1~"0 a 2 tragos",a51_2==2~"3 a 4 tragos",
                                            a51_2==3~"5 a 6 tragos",a51_2==4~"7 a 9 tragos",
                                            a51_2==5~"10 o más tragos"),
                             levels=c("0 a 2 tragos","3 a 4 tragos","5 a 6 tragos",
                                      "7 a 9 tragos","10 o más tragos"),
                             ordered=T)) %>% 
    dplyr::mutate(a51_3=factor(dplyr::case_when(a51_3==1~"Nunca",a51_3==2~"Menos de una vez al mes",
                                            a51_3==3~"Mensualmente",a51_3==4~"Semanalmente",
                                            a51_3==5~"Todos o casi todos los días"),
                             levels=c("Nunca","Menos de una vez al mes","Mensualmente",
                                      "Semanalmente","Todos o casi todos los días"),
                             ordered=T)) %>% 
      dplyr::mutate(a46_top_1=factor(dplyr::case_when(a46_top_1=="2"~"Yes",
                                                      a46_top_1=="1"~"No",
                                                      T~NA_character_),
                             levels=c("No","Yes"))) %>%
        dplyr::mutate(a46_top_12=factor(dplyr::case_when(a46_top_12=="2"~"Yes",
                                                      a46_top_12=="1"~"No",
                                                      T~NA_character_),
                             levels=c("No","Yes"))) %>%
        dplyr::mutate(a46_top_13=factor(dplyr::case_when(a46_top_13=="2"~"Yes",
                                                      a46_top_13=="1"~"No",
                                                      T~NA_character_),
                             levels=c("No","Yes")))


audit_1f_inicial_form <- "
audit =~  a51_1+ a51_2+ a51_3
con_psi =~ a46_top_1+ a46_top_12+ a46_top_13
"

audit_1f_inicial<- lavaan::cfa(audit_1f_inicial_form, data = dim_auditc_gral2, ordered=names(dim_auditc_gral), estimator= "WLSMV", warn = T,std.lv=T)

#summary(resil_1f_inicial,fit.measures = TRUE, standardized = TRUE)
data.table::data.table(cbind(modelos= c("2 factores"),
                             rbind(ajusteAFC(audit_1f_inicial))),keep.rownames = F) %>% 
  knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Tabla 3g. Psychometric Properties of Risky Drinking & Psychotropics Medication"),
               align =c('l',rep('c', 101)),
               col.names = c("Modelos","Código", "gl","WLS X2","CMIN/df","aGFI","GFI", "RMSEA\n[IC 90%]","CFit","CFI","NNFI")) %>%
      #kableExtra::row_spec(1, bold= T, color= "black", background= "white")%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 12)
Tabla 3g. Psychometric Properties of Risky Drinking & Psychotropics Medication
Modelos Código gl WLS X2 CMIN/df aGFI GFI RMSEA [IC 90%] CFit CFI NNFI
2 factores audit_1f_inicial 8 16.963 2.120 0.997 0.999 0.024[0.007-0.040] 0.998 0.999 0.999
#Cabe señalar que este es un modelo saturado. Es esperable que ahbiendo calculado sólo una variable latente existan 3 varianzas y 3 covarianzas, por lo que el parámetro a ser estimado sean 3 varianzas de error y 3 cargas factoriales. En este caso, los grados de libertad son 0 (3 elementos - 3 parámetros a estimar).


#Cargas factoriales.
afc_lambda_std_audit<-
    data.table::data.table(lavaan::lavInspect(audit_1f_inicial, what = "std", add.labels = TRUE)$`lambda`,keep.rownames = T) %>% 
      dplyr::mutate(label=dplyr::case_when(grepl("a51_1",rn)~"1.¿Qué tan seguido toma usted alguna bebida alcohólica?",
    grepl("a51_2",rn)~"2.¿Cuántos tragos suele tomar usted en un día típico de consumo de alcohol?",
    grepl("a51_3",rn)~"3.¿Qué tan seguido toma usted 5 o más tragos en una sola ocasión?",
    grepl("a46_top_12",rn)~"5.¿Ayudar a dormir, tales como hipnóticos?",
    grepl("a46_top_13",rn)~"6.¿Remontar el ánimo tales como antidepresivos?",
    grepl("a46_top_1",rn)~"4.¿Disminuir la ansiedad o el nerviosismo, tales como ansiolíticos?"))%>% 
    dplyr::select(label,everything()) %>% 
    dplyr::mutate(across(where(is.numeric), ~ round(.x, 2))) %>% 
    dplyr::mutate(across(where(is.numeric), ~ ifelse(.x==0,NA,.x))) 
for (i in 1:nrow(afc_lambda_std_audit)){
    afc_lambda_std_audit$label[i]<-stringr::str_wrap(afc_lambda_std_audit$label[i], width = 40, indent = 0, exdent = 0)
}

#R cuadrado var latentes
afc_psi_std_audit<-
    cbind.data.frame(name=c("AUDIT","Psychotropic Medication"),
                     data.table::data.table(lavaan::lavInspect(audit_1f_inicial, what = "std", add.labels = TRUE)$psi,keep.rownames = T)) %>% 
    dplyr::mutate(across(where(is.numeric), ~ round(.x, 2))) %>% 
    dplyr::mutate(across(where(is.numeric), ~ ifelse(.x==1,NA,.x))) 

rbind.data.frame(afc_psi_std_audit) %>% 
    knitr::kable(format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
                 caption = paste0("Table 3f. Factor Loadings, AUDIT-c & Psychotropic Medication"),
                 align =c('l',rep('c', 101))) %>% 
                 #col.names = c("Ítem","Código", 
                  #             "Ambiente y Cultura", "Exigencias\nPsicológicas", "Conflicto\nde Rol", "Relación con\nPares y Ext.", "Relación con\nSuperiores"))%>%
    kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 12) %>%
    kableExtra::add_footnote(c("Nota. "),
                             notation = "none")%>%
    kableExtra::scroll_box(width = "100%", height = "275px")
Table 3f. Factor Loadings, AUDIT-c & Psychotropic Medication
name rn audit con_psi
AUDIT audit -0.05
Psychotropic Medication con_psi -0.05
Nota.
#A factor with 2 variables is only considered reliable when the variables are highly correlated with each another (r > .70) but fairly uncorrelated with other variables. Yong, A. G., & Pearce, S. (2013, p. 80).


#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#Para ver el número de parámetros
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

if(no_mostrar==1){
for (i in 2:100){
  ano<-i
  try(semPlot::semPaths(audit_1f_inicial, 
                  residuals=T, 
                  what="std", 
                  label.cex=1.5, 
                  edge.label.cex=1,
                  fade=FALSE,
                  thresholds = F,
                  edge.label.position=rep(.5,ano),
                  filetype="pdf",
                  filename=paste0("Número de parámetros ",ano),
                  intercepts=F)
    )
  }
}
## Error in eval(expr, envir, enclos): objeto 'no_mostrar' no encontrado
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
semPlot::semPaths(audit_1f_inicial, 
                  residuals=T, 
                  what="std", 
                  label.cex=c(rep(2,6),3,4), 
                  edge.label.cex=1,
                  fade=FALSE,
                  thresholds = F,
                  #58 en total
                  edge.label.position=c(rep(c(0.45,0.55),9),
                                        rep(0.5,4)), #10 pars
                  nodeLabels=c("p51_1","p51_2","p51_3",
                               "a46_top_1","a46_top_12","a46_top_13",
                               "AUDIT", "Psy Med"),
                  intercepts=F, 
                  curveAdjacent = TRUE,
                  title=F, 
                  layout="tree3",
                  sizeMan=5, 
                  gui = T, 
                  allVars = FALSE,
                  cut=1.1,
                  sizeLat=5, 
                  edge.color="black",
                  curvePivot=T,
                  borders=FALSE, 
                  edge.width = 0.5, 
                  node.width =1, 
                  node.height= 1,
                  label.scale=T,
                  curve = 1.5,
                  mar = c(5.8, 0.8, 7.8, 0.8))
Figura 2d. Communalities and standardized factor loadings of AUDIT & Psychotropic Medication

Figura 2d. Communalities and standardized factor loadings of AUDIT & Psychotropic Medication

NAQ-R

comp_naq<-na.omit(BD_2020_input3[,paste0("naq",1:22)]) %>% 
  dplyr::mutate_all(~as.numeric(.))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
MVN::mvn(comp_naq, subset = NULL, mvnTest = "mardia", covariance = TRUE, tol = 1e-25, alpha = 0.5,scale = FALSE, desc = TRUE, transform = "none", R = 1000,univariateTest = "Lillie",univariatePlot = "none", multivariatePlot = "none",multivariateOutlierMethod = "none", bc = FALSE, bcType = "rounded",showOutliers = FALSE, showNewData = FALSE)$multivariateNormality%>%
  data.frame()%>%
  dplyr::mutate(across(c("Statistic","p.value"),~ifelse(grepl("NA",.),NA_real_,as.numeric(as.character(.)))))%>%
  dplyr::mutate(Statistic=sprintf("%8.2f",Statistic))%>%
  dplyr::mutate(p.value=sprintf("%5.3f",p.value))%>%
  dplyr::mutate(across(c("Statistic","p.value"),~ifelse(grepl("NA",.),"-",as.character(.))))%>%
   knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 3e. Multivariate Normality, NAQR"),
               col.names = c("Test","Statistic","P value","Normality"),
align =c('l',rep('c', 101)))%>%
    kableExtra::add_footnote(c("Note. Mardia Skewness= Mardia Multivariable Skewness(1970); Mardia Kurtosis= Mardia Kurtosis (1970)"), notation = "none")%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 10)
Table 3e. Multivariate Normality, NAQR
Test Statistic P value Normality
Mardia Skewness 80560.92 0.000 NO
Mardia Kurtosis 718.15 0.000 NO
MVN
NO
Note. Mardia Skewness= Mardia Multivariable Skewness(1970); Mardia Kurtosis= Mardia Kurtosis (1970)
cbind(Models=c("NAQ-R"),
      KMO=c(round(psych::KMO(na.omit(comp_naq))$MSA,2))
) %>% 
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
                 caption = paste0("Table 3f. Keyser-Meyer-Olkin, NAQR"),
                 align =c('l',rep('c', 101)))%>%
    #kableExtra::add_footnote(c("Nota. Los ítems son")), notation = "none")%>%
    kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 10)  
Table 3f. Keyser-Meyer-Olkin, NAQR
Models KMO
NAQ-R 0.96
naq_gral<-
BD_2020_input3 %>% 
  dplyr::mutate_at(.vars= vars(naq1, naq2, naq3,naq4, naq5, naq6, naq7, naq8, naq9, naq10, naq11, naq12, naq13, naq14, naq15, naq16, naq17, naq18, naq19, naq20, naq21, naq22),.funs = ~as.numeric(.)) %>% 
  dplyr::select(c("naq1", "naq2", "naq3","naq4", "naq5", "naq6", "naq7", "naq8", "naq9", "naq10", "naq11", "naq12", "naq13", "naq14", "naq15", "naq16", "naq17", "naq18", "naq19", "naq20", "naq21", "naq22")) %>% 
 #   mutate_at(paste0("naq",1:22), ~factor(case_when(as.numeric(.)-1==0~"1.Never",
#                                                    as.numeric(.)-1==4~"5.Daily",
#                                                    TRUE~as.character(.)),ordered=T)) %>% 
  na.omit()
attr(naq_gral$naq1, "label") <- 'Alguien le ha ocultado información que ha afectado a su rendimiento. [1. Someone withholding information which affects your performance ]'
attr(naq_gral$naq2, "label") <- 'Ha sido humillado o ridiculizado en relación a su trabajo. [2. Being humiliated or ridiculed in connection with your work]'
attr(naq_gral$naq3, "label") <- 'Le han ordenado realizar un trabajo que está por debajo de su nivel de competencia o calificación. [3. Being ordered to do work below your level of competence]'
attr(naq_gral$naq4, "label") <- 'Le han cambiado de realizar tareas de responsabilidad por otras más triviales o desagradables. [4. Having key areas of responsibility removed or replaced with more trivial or unpleasant tasks]'
attr(naq_gral$naq5, "label") <- 'Se han extendido rumores sobre usted. [5. Spreading of gossip and rumors about you]'
attr(naq_gral$naq6, "label") <- 'Ha sido ignorado, excluido o le han dejado de hablar. [6. Being ignored or excluded]'
attr(naq_gral$naq7, "label") <- 'Le han insultado u ofendido con comentarios sobre usted, sus actitudes o su vida privada. [7. Having insulting or offensive remarks made about your person]'
attr(naq_gral$naq8, "label") <- 'Le han gritado o ha sido objeto de enfados espontáneos. [8. Being shouted at or being the target of spontaneous anger]'
attr(naq_gral$naq9, "label") <- 'Ha sufrido conductas intimidatorias como ser apuntado con el dedo, la invasión de su espacio personal, empujones, que no le dejen pasar, etc. [9. Intimidating behavior such as finger-pointing, invasion of personal space, shoving, blocking/barring the way]'
attr(naq_gral$naq10, "label") <- 'Ha visto detalles o indirectas de otros que le sugieran abandonar su trabajo. [10. Hints or signals from others that you should quit your job]'
attr(naq_gral$naq11, "label") <- 'Le han recordado continuamente sus errores y fallos. [11. Repeated reminders of your errors or mistakes]'
attr(naq_gral$naq12, "label") <- 'Ha sido ignorado o ha recibido una reacción hostil cuando se ha acercado a alguien. [12. Being ignored or facing a hostile reaction when you approach]'
attr(naq_gral$naq13, "label") <- 'Ha recibido críticas persistentes sobre su trabajo y esfuerzo [13. Persistent criticism of your work and effort]'
attr(naq_gral$naq14, "label") <- 'Sus opiniones y puntos de vista han sido ignorados. [14. Having your opinions and views ignored ]'
attr(naq_gral$naq15, "label") <- 'Ha recibido bromas pesadas de gente con la que no se lleva bien. [15. Practical jokes carried out by people you do not get on with]'
attr(naq_gral$naq16, "label") <- 'Le han asignado tareas u objetivos inalcanzables. [16. Being given tasks with unreasonable or impossible targets or deadlines]'
attr(naq_gral$naq17, "label") <- 'Ha recibido acusaciones (reclamos) en su contra. [17. Having allegations made against you]'
attr(naq_gral$naq18, "label") <- 'Ha sido excesivamente supervisado en su trabajo. [18. Excessive monitoring of your work]'
attr(naq_gral$naq19, "label") <- 'Ha sido presionado para no reclamar algo a lo que tiene derecho (p. Ej., licencia por enfermedad, vacaciones, viáticos, etc.) [19. Pressure not to claim something which by right you are entitled to]'
attr(naq_gral$naq20, "label") <- 'Ha sido objeto de numerosas tomaduras de pelo y sarcasmos. [20. Being the subject of excessive teasing and sarcasm]'
attr(naq_gral$naq21, "label") <- 'Ha sido expuesto a una excesiva carga de trabajo. [21. Being exposed to an unmanageable workload]'
attr(naq_gral$naq22, "label") <- 'Ha recibido amenazas de violencia o de abusos físicos. [22. Threats of violence or physical abuse or actual abuse]'

alfa_dim_naq_gral<-psych::alpha(naq_gral,use="complete.obs",n.obs=nrow(naq_gral),check.keys=T)

naq_labs<-data.frame()
for (i in c(1:length(naq_gral))){
naq_labs<-rbind.data.frame(naq_labs,cbind(name=names(naq_gral)[i],lab=attr(naq_gral[[i]],"label")))
}

data.table(alfa_dim_naq_gral$alpha.drop,keep.rownames = T)%>%
  dplyr::left_join(naq_labs,by=c("rn"="name")) %>% 
    dplyr::select(lab, raw_alpha, std.alpha,`G6(smc)`)%>%
    dplyr::mutate_at(vars(raw_alpha, std.alpha,`G6(smc)`),~round(.,2)) %>% 
   knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 3g. Cronbach's Alpha if item is deleted",paste0("(alfa= ",round(alfa_dim_naq_gral$total$std.alpha,2),", SMC= ",round(alfa_dim_naq_gral$total$`G6(smc`,2),")")),
               col.names = c("Item","Raw Alpha","Std. Alpha","SMC"),
align =c('l',rep('c', 101)))%>%
    kableExtra::add_footnote(c("Note. SMC= Multiple Squared Correlations"), notation = "none")%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 12)
Table 3g. Cronbach’s Alpha if item is deleted(alfa= 0.94, SMC= 0.94)
Item Raw Alpha Std. Alpha SMC
Alguien le ha ocultado información que ha afectado a su rendimiento. [1. Someone withholding information which affects your performance ] 0.93 0.94 0.94
Ha sido humillado o ridiculizado en relación a su trabajo. [2. Being humiliated or ridiculed in connection with your work] 0.93 0.94 0.94
Le han ordenado realizar un trabajo que está por debajo de su nivel de competencia o calificación. [3. Being ordered to do work below your level of competence] 0.93 0.94 0.94
Le han cambiado de realizar tareas de responsabilidad por otras más triviales o desagradables. [4. Having key areas of responsibility removed or replaced with more trivial or unpleasant tasks] 0.93 0.94 0.94
Se han extendido rumores sobre usted. [5. Spreading of gossip and rumors about you] 0.93 0.94 0.94
Ha sido ignorado, excluido o le han dejado de hablar. [6. Being ignored or excluded] 0.93 0.93 0.94
Le han insultado u ofendido con comentarios sobre usted, sus actitudes o su vida privada. [7. Having insulting or offensive remarks made about your person] 0.93 0.93 0.94
Le han gritado o ha sido objeto de enfados espontáneos. [8. Being shouted at or being the target of spontaneous anger] 0.93 0.93 0.94
Ha sufrido conductas intimidatorias como ser apuntado con el dedo, la invasión de su espacio personal, empujones, que no le dejen pasar, etc. [9. Intimidating behavior such as finger-pointing, invasion of personal space, shoving, blocking/barring the way] 0.93 0.94 0.94
Ha visto detalles o indirectas de otros que le sugieran abandonar su trabajo. [10. Hints or signals from others that you should quit your job] 0.93 0.93 0.94
Le han recordado continuamente sus errores y fallos. [11. Repeated reminders of your errors or mistakes] 0.93 0.94 0.94
Ha sido ignorado o ha recibido una reacción hostil cuando se ha acercado a alguien. [12. Being ignored or facing a hostile reaction when you approach] 0.93 0.93 0.94
Ha recibido críticas persistentes sobre su trabajo y esfuerzo [13. Persistent criticism of your work and effort] 0.93 0.94 0.94
Sus opiniones y puntos de vista han sido ignorados. [14. Having your opinions and views ignored ] 0.93 0.94 0.94
Ha recibido bromas pesadas de gente con la que no se lleva bien. [15. Practical jokes carried out by people you do not get on with] 0.93 0.94 0.94
Le han asignado tareas u objetivos inalcanzables. [16. Being given tasks with unreasonable or impossible targets or deadlines] 0.93 0.94 0.94
Ha recibido acusaciones (reclamos) en su contra. [17. Having allegations made against you] 0.93 0.93 0.94
Ha sido excesivamente supervisado en su trabajo. [18. Excessive monitoring of your work] 0.93 0.94 0.94
Ha sido presionado para no reclamar algo a lo que tiene derecho (p. Ej., licencia por enfermedad, vacaciones, viáticos, etc.) [19. Pressure not to claim something which by right you are entitled to] 0.93 0.94 0.94
Ha sido objeto de numerosas tomaduras de pelo y sarcasmos. [20. Being the subject of excessive teasing and sarcasm] 0.93 0.94 0.94
Ha sido expuesto a una excesiva carga de trabajo. [21. Being exposed to an unmanageable workload] 0.93 0.94 0.94
Ha recibido amenazas de violencia o de abusos físicos. [22. Threats of violence or physical abuse or actual abuse] 0.93 0.94 0.94
Note. SMC= Multiple Squared Correlations
cbind.data.frame(Models="NAQ-R",data.frame(t(psych::unidim(naq_gral,cor= "poly")$uni)))%>% 
  dplyr::select(`Models`,`u`,`Unidim.A`,`alpha`,`av.r`,`median.r`) %>% 
  dplyr::mutate_if(is.numeric, round, 2) %>% 
   knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 3h. Unidimensionality, NAQ-R"),
               col.names = c("Models","Raw Unidimensionality","Adjusted Unidimensionality","Std. Alpha","Mean of Intraclass correlation coefficients (ICC)", "Median of Intraclass correlation coefficients (ICC)"),
align =c('l',rep('c', 101)))%>%
    #kableExtra::add_footnote(c("Nota. Los ítems son")), notation = "none")%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 13)  
Table 3h. Unidimensionality, NAQ-R
Models Raw Unidimensionality Adjusted Unidimensionality Std. Alpha Mean of Intraclass correlation coefficients (ICC) Median of Intraclass correlation coefficients (ICC)
NAQ-R 0.97 1 0.97 0.58 0.57
##Koo and Li (2016) gives the following suggestion for interpreting ICC (Koo and Li 2016):
#below 0.50: poor
#between 0.50 and 0.75: moderate
#between 0.75 and 0.90: good
#above 0.90: excellent

Due to problems into defining the polychoric correlation, we made 20 attempts until a maximum likelihood solution is found.

library(psych)
library(dplyr)
no_attempts <- 20

comp_naq<-BD_2020_input3[,c(paste0("naq",1:22),"weight")] %>% 
    dplyr::mutate_all(~as.numeric(.)) %>% mutate(rn=row_number()) %>% 
  na.omit()
## mutate: new variable 'rn' (integer) with 1,995 unique values and 0% NA
  r <- NULL
  attempt <- 0
  while( is.null(r) && attempt <= no_attempts ) {
    attempt <- attempt + 1
    try(
      r <- psych::cor.ci(comp_naq[,paste0("naq",1:22)],
                  global=T,ML=T, std.err=F,weight=comp_naq$weight, poly=T,
                  correct=.5,progress=T,na.rm=T, delete=T)
    )
  } 
  naqr_policor_wgt <- r
hetcornaq_df2<-
reshape2::melt(tibble::as_tibble(naqr_policor_wgt$rho,rownames = "rowname")) %>% 
    dplyr::mutate(comb=paste0(substr(rowname,1,5),"-",substr(variable,1,5))) %>% 
    dplyr::mutate(comb2=paste0(substr(variable,1,5),"-",substr(rowname,1,5))) %>% 
    dplyr::left_join(data.table::data.table(round(naqr_policor_wgt$ci[,c("lower","upper")],2),keep.rownames = T),by=c("comb"="rn")) %>% 
    dplyr::left_join(data.table::data.table(round(naqr_policor_wgt$ci[,c("lower","upper")],2),keep.rownames = T),by=c("comb2"="rn")) %>%
  dplyr::mutate(lo=ifelse(is.na(lower.y),lower.x,lower.y)) %>% 
  dplyr::mutate(up=ifelse(is.na(upper.y),upper.x,upper.y)) %>% 
  dplyr::select(-lower.x, -upper.x, -lower.y, -upper.y) %>% 
  dplyr::left_join(naq_labs,by=c("rowname"="name")) %>%
  dplyr::left_join(naq_labs,by=c("variable"="name")) %>%
  dplyr::rename("Var1_lab"="lab.x") %>% 
  dplyr::rename("Var2_lab"="lab.y") %>% 
  dplyr::select(-rowname, -variable)
## Using rowname as id variables
#pd4 <- hetcornaq_df2 %>% 
#  dplyr::mutate(tooltip = paste0('Y: ',Var1_lab, '<br>', 'X: ',Var2_lab, '<br>Rho: ', round(as.numeric(value), 2), 
#                          ' [',lo,"-",up,"]"))
pd4 <- hetcornaq_df2 %>% 
  dplyr::mutate(tooltip = paste0('Y: ',Var1_lab, '\n', 'X: ',Var2_lab, '\nRho: ', round(as.numeric(value), 2), 
                          ' [',lo,"-",up,"]"))

p4 <- ggplot(pd4,aes(text=tooltip) ) +
  geom_tile(aes(Var1_lab,Var2_lab, fill = value,
                            #tooltip = tooltip
                           ), color = "gray")  +
  scale_fill_gradient2(low = "#E46726", high = "#6D9EC1",
                       mid = "white", midpoint = 0, limit = c(-1, 1), space = "Lab",
                       name = "Corr") +
  geom_text(mapping = aes(x = Var1_lab, y = Var2_lab, label = round(value, 2)), size = 2) +
  ggplot2::coord_fixed() +
  theme_minimal() +
  theme(axis.text.x = element_blank())+
  theme(axis.text.y = element_blank())+
  guides(fill = FALSE) +
  xlab("") + ylab("")

ggplotly(p4, tooltip="text")%>% layout(autosize = F, width = 600, height = 600)

Figure 2d. Polychoric correlation matix of NAQ-R (Survey Weighted)

#Parallel analysis compares factor and principal components solutions to the real data as well as resampled data
poly_fa_naq<-
fa.parallel(naqr_policor_wgt$rho, fm="pa", fa="both", n.obs=nrow(comp_naq), nfactors=1, error.bars=F,n.iter=500,SMC=T,quant=.95,
            sim=T,show.legend=FALSE, main="Scree plot with parallel analysis",cor="poly")
Figure 2e. Parallel Analysis of NAQ-R

Figure 2e. Parallel Analysis of NAQ-R

## Parallel analysis suggests that the number of factors =  7  and the number of components =  2
poly_fa_naq
## Call: fa.parallel(x = naqr_policor_wgt$rho, n.obs = nrow(comp_naq), 
##     fm = "pa", fa = "both", nfactors = 1, main = "Scree plot with parallel analysis", 
##     n.iter = 500, error.bars = F, SMC = T, show.legend = FALSE, 
##     sim = T, quant = 0.95, cor = "poly")
## Parallel analysis suggests that the number of factors =  7  and the number of components =  2 
## 
##  Eigen Values of 
## 
##  eigen values of factors
##  [1] 12.85  0.89  0.72  0.24  0.23  0.19  0.10  0.07  0.05  0.02  0.01 -0.01
## [13] -0.03 -0.04 -0.06 -0.07 -0.09 -0.10 -0.11 -0.13 -0.14 -0.15
## 
##  eigen values of simulated factors
##  [1]  0.21  0.18  0.15  0.13  0.11  0.09  0.08  0.06  0.04  0.03  0.01  0.00
## [13] -0.02 -0.03 -0.04 -0.06 -0.07 -0.09 -0.11 -0.12 -0.14 -0.17
## 
##  eigen values of components 
##  [1] 13.19  1.26  1.08  0.63  0.59  0.52  0.45  0.44  0.41  0.40  0.36  0.33
## [13]  0.32  0.32  0.29  0.26  0.24  0.23  0.21  0.18  0.16  0.14
## 
##  eigen values of simulated components
##  [1] 1.19 1.16 1.14 1.12 1.10 1.08 1.07 1.05 1.03 1.02 1.00 0.99 0.97 0.96 0.95
## [16] 0.93 0.91 0.90 0.88 0.87 0.84 0.82
naq_fa<-
fa(na.omit(comp_naq[,paste0("naq",1:22)]), nfactors = 1,  n.obs=nrow(comp_naq), fm="pa", SMC=T, cor="poly",rotate="oblimin", weight=na.omit(comp_naq)$weight) #

 data.frame(naq_fa$TLI, naq_fa$RMSEA[1], naq_fa$RMSEA[2], naq_fa$RMSEA[3], naq_fa$rms, naq_fa$BIC) %>% 
        mutate(Factors = 1,
               RMSEA_ci = paste0(round(naq_fa.RMSEA.2.,2),", ",round(naq_fa.RMSEA.3.,2))) %>% 
        dplyr::rename(TLI = naq_fa.TLI,
                      RMSEA = naq_fa.RMSEA.1.,
                      "RMSEA CI90%" = RMSEA_ci,
                      SRMR = naq_fa.rms, 
                      BIC = naq_fa.BIC) %>% 
        dplyr::select(Factors, TLI, RMSEA, "RMSEA CI90%", SRMR, BIC) %>% 
      data.table::data.table(keep.rownames = F) %>% 
    dplyr::mutate_at(c(2,3,5),~round(as.numeric(.),2)) %>% 
    dplyr::mutate_at(c(6),~round(as.numeric(.),0)) %>% 
    knitr::kable(format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
                 caption = paste0("Tabla 3e. Fit indices, Exploratory Factor Analysis AUDIT and Psy Medication"),
                 #col.names = c("Prueba","Estadistico","Valor p","Normalidad"),
                 align =c('l',rep('c', 101)))%>%
    kableExtra::add_footnote(c("Nota. TLI= Tucker-Lewis Index; RMSEA= Root Mean Square Error of Approximation; SRMR= Standardized Root Mean Residual; BIC= Bayesian Information Criterion"), notation = "none")%>%
    kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 12)


Missing variables

#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:400px; overflow-x: scroll; width:100%">
library(dplyr)
library(ggplot2)
missing.values<-
BD_2020_input3 %>%
  rowwise %>%
  dplyr::mutate_at(.vars = vars(c(cat_vars2,"a52")),
                   .funs = ~ifelse(is.na(.), 1, 0)) %>% 
  dplyr::ungroup() %>% 
  dplyr::summarise_at(c(cat_vars2,"a52"),~sum(.)) %>% 
  dplyr::ungroup()

for (i in 1:c(length(c(cat_vars2,"a52")))){
  x<-c(cat_vars2,"a52")[i]
attr(missing.values[[x]],"label")<-attr(BD_2020_input3[[x]],"label")
}

df_lab<-data.frame()
for (i in 1:length(c(cat_vars2,"a52"))){
  x<-c(cat_vars2,"autonaq")[i]
  df_lab<- rbind.data.frame(df_lab,cbind(x,attr(BD_2020_input3[[x]],"label")))
  #print(x)
  #print(attr(missing.values[[x]],"label"))
  #attr(missing.values[[x]],"label")
}

limits_missing<-round(max(missing.values)/nrow(BD_2020_input3),2)

plot_miss<-
missing.values %>%
  data.table::melt() %>% 
  dplyr::mutate(perc= value/nrow(BD_2020_input3)) %>% 
  dplyr::left_join(df_lab, by=c("variable"="x")) %>% 
  dplyr::mutate(text= paste0("Variable= ",variable,"<br>n= ",value,"<br>",scales::percent(round(perc,3)),"<br>Label=",V2)) %>%
  dplyr::mutate(perc=perc*100) %>% 
  ggplot() +
  geom_bar(aes(x=factor(variable), y=perc,label= text), stat = 'identity') +
  sjPlot::theme_sjplot()+
#  scale_y_continuous(limits=c(0,1), labels=percent)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size=9))+
  labs(x=NULL, y="% of Missing Values", caption=paste0("Note. Porcentaje de perdidos del total (",nrow(BD_2020_input3),")"))
  ggplotly(plot_miss, tooltip = c("text"))%>% layout(xaxis= list(showticklabels = T), height = 600, width=800) %>%   layout(yaxis = list(tickformat='%',  range = c(0,limits_missing*100+1)))

Figure 3. Bar plot of Percentage of Missing Values per Variables

  #</div>



The variable that had more missing values only reached a 4% of missing values.


library(Amelia)

miss_vars<-
melt(missing.values) %>% 
    dplyr::filter(value>0)

noms_vars<-as.character(unlist(dplyr::filter(data.frame(cat_vars2),!cat_vars2 %in% c("gse","edad","tamano_de_la_empresa"))))
set.seed(2125)
amelia_fit <- amelia(data.frame(BD_2020_input3[,c("sbj_num",cat_vars2)]), 
                      m=61, 
                      parallel = "multicore", 
                      idvars="sbj_num",
                      noms= noms_vars,
                      ords=c("gse","edad","tamano_de_la_empresa")
                     )


Seniority in Workplace

We also imputed the variable of the Seniority in the workplace (antiguedad), which had a small amount of missing values (n= 2).


# Ver distintos valores propuestos para sustancia de inciio
# names(amelia_fit$imputations$imp1)
antiguedad_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$sbj_num,
       amelia_fit$imputations$imp1$antiguedad,
       amelia_fit$imputations$imp2$antiguedad,
       amelia_fit$imputations$imp3$antiguedad,
       amelia_fit$imputations$imp4$antiguedad,
       amelia_fit$imputations$imp5$antiguedad,
       amelia_fit$imputations$imp6$antiguedad,
       amelia_fit$imputations$imp7$antiguedad,
       amelia_fit$imputations$imp8$antiguedad,
       amelia_fit$imputations$imp9$antiguedad,
       amelia_fit$imputations$imp10$antiguedad,
       amelia_fit$imputations$imp11$antiguedad,
       amelia_fit$imputations$imp12$antiguedad,
       amelia_fit$imputations$imp13$antiguedad,
       amelia_fit$imputations$imp14$antiguedad,
       amelia_fit$imputations$imp15$antiguedad,
       amelia_fit$imputations$imp16$antiguedad,
       amelia_fit$imputations$imp17$antiguedad,
       amelia_fit$imputations$imp18$antiguedad,
       amelia_fit$imputations$imp19$antiguedad,
       amelia_fit$imputations$imp20$antiguedad,
       amelia_fit$imputations$imp21$antiguedad,
       amelia_fit$imputations$imp22$antiguedad,
       amelia_fit$imputations$imp23$antiguedad,
       amelia_fit$imputations$imp24$antiguedad,
       amelia_fit$imputations$imp25$antiguedad,
       amelia_fit$imputations$imp26$antiguedad,
       amelia_fit$imputations$imp27$antiguedad,
       amelia_fit$imputations$imp28$antiguedad,
       amelia_fit$imputations$imp29$antiguedad,
       amelia_fit$imputations$imp30$antiguedad,
       amelia_fit$imputations$imp31$antiguedad,
       amelia_fit$imputations$imp32$antiguedad,
       amelia_fit$imputations$imp33$antiguedad,
       amelia_fit$imputations$imp34$antiguedad,
       amelia_fit$imputations$imp35$antiguedad,
       amelia_fit$imputations$imp36$antiguedad,
       amelia_fit$imputations$imp37$antiguedad,
       amelia_fit$imputations$imp38$antiguedad,
       amelia_fit$imputations$imp39$antiguedad,
       amelia_fit$imputations$imp40$antiguedad,
       amelia_fit$imputations$imp41$antiguedad,
       amelia_fit$imputations$imp42$antiguedad,
       amelia_fit$imputations$imp43$antiguedad,
       amelia_fit$imputations$imp44$antiguedad,
       amelia_fit$imputations$imp45$antiguedad,
       amelia_fit$imputations$imp46$antiguedad,
       amelia_fit$imputations$imp47$antiguedad,
       amelia_fit$imputations$imp48$antiguedad,
       amelia_fit$imputations$imp49$antiguedad,
       amelia_fit$imputations$imp50$antiguedad,
       amelia_fit$imputations$imp51$antiguedad,
       amelia_fit$imputations$imp52$antiguedad,
       amelia_fit$imputations$imp53$antiguedad,
       amelia_fit$imputations$imp54$antiguedad,
       amelia_fit$imputations$imp55$antiguedad,
       amelia_fit$imputations$imp56$antiguedad,
       amelia_fit$imputations$imp57$antiguedad,
       amelia_fit$imputations$imp58$antiguedad,
       amelia_fit$imputations$imp59$antiguedad,
       amelia_fit$imputations$imp60$antiguedad,
       amelia_fit$imputations$imp61$antiguedad
       ) %>% 
  data.frame() %>% 
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.antiguedad:amelia_fit.imputations.imp61.antiguedad),~dplyr::case_when(grepl("More than six months",as.character(.))~1,TRUE~0), .names="yes_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.antiguedad:amelia_fit.imputations.imp61.antiguedad),~dplyr::case_when(grepl("Six months or less",as.character(.))~1,TRUE~0), .names="no_{col}"))%>%
  dplyr::mutate(antiguedad_yes = base::rowSums(dplyr::select(., starts_with("yes_"))))%>%
  dplyr::mutate(antiguedad_no = base::rowSums(dplyr::select(., starts_with("no_"))))%>%
  janitor::clean_names() %>% 
  dplyr::select(amelia_fit_imputations_imp1_sbj_num,antiguedad_yes,antiguedad_no)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

BD_2020_input3_miss1<-
BD_2020_input3 %>% 
   dplyr::left_join(antiguedad_imputed, by=c("sbj_num"="amelia_fit_imputations_imp1_sbj_num")) %>% 
    dplyr::mutate(antiguedad=factor(dplyr::case_when(is.na(antiguedad) & antiguedad_yes>antiguedad_no~"More than six months",
                                                  is.na(antiguedad) & antiguedad_yes<antiguedad_no~"Six months or less",
                                                  TRUE~as.character(antiguedad)))) %>% 
  data.frame()
 
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#table(BD_2020_input3_miss2$antiguedad,exclude = NULL)

if(nrow(BD_2020_input3_miss1)-nrow(BD_2020_input3)>0){
  warning("AGS: Some rows were added in the imputation")}


We ended having 0 cases with missing values in this variable.


Customer service

We also imputed the variable of whether a participant (atpub), which had a small amount of missing values (n= 2).


# Ver distintos valores propuestos para sustancia de inciio
# names(amelia_fit$imputations$imp1)
atpub_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$sbj_num,
       amelia_fit$imputations$imp1$atpub,
       amelia_fit$imputations$imp2$atpub,
       amelia_fit$imputations$imp3$atpub,
       amelia_fit$imputations$imp4$atpub,
       amelia_fit$imputations$imp5$atpub,
       amelia_fit$imputations$imp6$atpub,
       amelia_fit$imputations$imp7$atpub,
       amelia_fit$imputations$imp8$atpub,
       amelia_fit$imputations$imp9$atpub,
       amelia_fit$imputations$imp10$atpub,
       amelia_fit$imputations$imp11$atpub,
       amelia_fit$imputations$imp12$atpub,
       amelia_fit$imputations$imp13$atpub,
       amelia_fit$imputations$imp14$atpub,
       amelia_fit$imputations$imp15$atpub,
       amelia_fit$imputations$imp16$atpub,
       amelia_fit$imputations$imp17$atpub,
       amelia_fit$imputations$imp18$atpub,
       amelia_fit$imputations$imp19$atpub,
       amelia_fit$imputations$imp20$atpub,
       amelia_fit$imputations$imp21$atpub,
       amelia_fit$imputations$imp22$atpub,
       amelia_fit$imputations$imp23$atpub,
       amelia_fit$imputations$imp24$atpub,
       amelia_fit$imputations$imp25$atpub,
       amelia_fit$imputations$imp26$atpub,
       amelia_fit$imputations$imp27$atpub,
       amelia_fit$imputations$imp28$atpub,
       amelia_fit$imputations$imp29$atpub,
       amelia_fit$imputations$imp30$atpub,
       amelia_fit$imputations$imp31$atpub,
       amelia_fit$imputations$imp32$atpub,
       amelia_fit$imputations$imp33$atpub,
       amelia_fit$imputations$imp34$atpub,
       amelia_fit$imputations$imp35$atpub,
       amelia_fit$imputations$imp36$atpub,
       amelia_fit$imputations$imp37$atpub,
       amelia_fit$imputations$imp38$atpub,
       amelia_fit$imputations$imp39$atpub,
       amelia_fit$imputations$imp40$atpub,
       amelia_fit$imputations$imp41$atpub,
       amelia_fit$imputations$imp42$atpub,
       amelia_fit$imputations$imp43$atpub,
       amelia_fit$imputations$imp44$atpub,
       amelia_fit$imputations$imp45$atpub,
       amelia_fit$imputations$imp46$atpub,
       amelia_fit$imputations$imp47$atpub,
       amelia_fit$imputations$imp48$atpub,
       amelia_fit$imputations$imp49$atpub,
       amelia_fit$imputations$imp50$atpub,
       amelia_fit$imputations$imp51$atpub,
       amelia_fit$imputations$imp52$atpub,
       amelia_fit$imputations$imp53$atpub,
       amelia_fit$imputations$imp54$atpub,
       amelia_fit$imputations$imp55$atpub,
       amelia_fit$imputations$imp56$atpub,
       amelia_fit$imputations$imp57$atpub,
       amelia_fit$imputations$imp58$atpub,
       amelia_fit$imputations$imp59$atpub,
       amelia_fit$imputations$imp60$atpub,
       amelia_fit$imputations$imp61$atpub
       ) %>% 
  data.frame() %>% 
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.atpub:amelia_fit.imputations.imp61.atpub),~dplyr::case_when(grepl("Customer service job",as.character(.))~1,TRUE~0), .names="yes_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.atpub:amelia_fit.imputations.imp61.atpub),~dplyr::case_when(grepl("Job w/o customer service",as.character(.))~1,TRUE~0), .names="no_{col}"))%>%
  dplyr::mutate(atpub_yes = base::rowSums(dplyr::select(., starts_with("yes_"))))%>%
  dplyr::mutate(atpub_no = base::rowSums(dplyr::select(., starts_with("no_"))))%>%
  janitor::clean_names() %>% 
  dplyr::select(amelia_fit_imputations_imp1_sbj_num,atpub_yes,atpub_no)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

BD_2020_input3_miss2<-
BD_2020_input3_miss1 %>% 
   dplyr::left_join(atpub_imputed, by=c("sbj_num"="amelia_fit_imputations_imp1_sbj_num")) %>% 
    dplyr::mutate(atpub=factor(dplyr::case_when(is.na(atpub) & atpub_yes>atpub_no~"Customer service job",
                                                  is.na(atpub) & atpub_yes<atpub_no~"Job w/o customer service",
                                                  TRUE~as.character(atpub)))) %>% 
  data.frame()
 
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#table(BD_2020_input3_miss2$atpub,exclude = NULL)

if(nrow(BD_2020_input3_miss2)-nrow(BD_2020_input3_miss1)>0){
  warning("AGS: Some rows were added in the imputation")}


As a result of the imputations, there were no missing values.


Private Sector

We also imputed the variable of the sector of the workplace (sect_priv), which had a small amount of missing values (n= 23).


# Ver distintos valores propuestos para sustancia de inciio
# names(amelia_fit$imputations$imp1)
sect_priv_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$sbj_num,
       amelia_fit$imputations$imp1$sect_priv,
       amelia_fit$imputations$imp2$sect_priv,
       amelia_fit$imputations$imp3$sect_priv,
       amelia_fit$imputations$imp4$sect_priv,
       amelia_fit$imputations$imp5$sect_priv,
       amelia_fit$imputations$imp6$sect_priv,
       amelia_fit$imputations$imp7$sect_priv,
       amelia_fit$imputations$imp8$sect_priv,
       amelia_fit$imputations$imp9$sect_priv,
       amelia_fit$imputations$imp10$sect_priv,
       amelia_fit$imputations$imp11$sect_priv,
       amelia_fit$imputations$imp12$sect_priv,
       amelia_fit$imputations$imp13$sect_priv,
       amelia_fit$imputations$imp14$sect_priv,
       amelia_fit$imputations$imp15$sect_priv,
       amelia_fit$imputations$imp16$sect_priv,
       amelia_fit$imputations$imp17$sect_priv,
       amelia_fit$imputations$imp18$sect_priv,
       amelia_fit$imputations$imp19$sect_priv,
       amelia_fit$imputations$imp20$sect_priv,
       amelia_fit$imputations$imp21$sect_priv,
       amelia_fit$imputations$imp22$sect_priv,
       amelia_fit$imputations$imp23$sect_priv,
       amelia_fit$imputations$imp24$sect_priv,
       amelia_fit$imputations$imp25$sect_priv,
       amelia_fit$imputations$imp26$sect_priv,
       amelia_fit$imputations$imp27$sect_priv,
       amelia_fit$imputations$imp28$sect_priv,
       amelia_fit$imputations$imp29$sect_priv,
       amelia_fit$imputations$imp30$sect_priv,
       amelia_fit$imputations$imp31$sect_priv,
       amelia_fit$imputations$imp32$sect_priv,
       amelia_fit$imputations$imp33$sect_priv,
       amelia_fit$imputations$imp34$sect_priv,
       amelia_fit$imputations$imp35$sect_priv,
       amelia_fit$imputations$imp36$sect_priv,
       amelia_fit$imputations$imp37$sect_priv,
       amelia_fit$imputations$imp38$sect_priv,
       amelia_fit$imputations$imp39$sect_priv,
       amelia_fit$imputations$imp40$sect_priv,
       amelia_fit$imputations$imp41$sect_priv,
       amelia_fit$imputations$imp42$sect_priv,
       amelia_fit$imputations$imp43$sect_priv,
       amelia_fit$imputations$imp44$sect_priv,
       amelia_fit$imputations$imp45$sect_priv,
       amelia_fit$imputations$imp46$sect_priv,
       amelia_fit$imputations$imp47$sect_priv,
       amelia_fit$imputations$imp48$sect_priv,
       amelia_fit$imputations$imp49$sect_priv,
       amelia_fit$imputations$imp50$sect_priv,
       amelia_fit$imputations$imp51$sect_priv,
       amelia_fit$imputations$imp52$sect_priv,
       amelia_fit$imputations$imp53$sect_priv,
       amelia_fit$imputations$imp54$sect_priv,
       amelia_fit$imputations$imp55$sect_priv,
       amelia_fit$imputations$imp56$sect_priv,
       amelia_fit$imputations$imp57$sect_priv,
       amelia_fit$imputations$imp58$sect_priv,
       amelia_fit$imputations$imp59$sect_priv,
       amelia_fit$imputations$imp60$sect_priv,
       amelia_fit$imputations$imp61$sect_priv
       ) %>% 
  data.frame() %>% 
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.sect_priv:amelia_fit.imputations.imp61.sect_priv),~dplyr::case_when(grepl("Private sector",as.character(.))~1,TRUE~0), .names="yes_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.sect_priv:amelia_fit.imputations.imp61.sect_priv),~dplyr::case_when(grepl("Public sector",as.character(.))~1,TRUE~0), .names="no_{col}"))%>%
  dplyr::mutate(sect_priv_yes = base::rowSums(dplyr::select(., starts_with("yes_"))))%>%
  dplyr::mutate(sect_priv_no = base::rowSums(dplyr::select(., starts_with("no_"))))%>%
  janitor::clean_names() %>% 
  dplyr::select(amelia_fit_imputations_imp1_sbj_num,sect_priv_yes,sect_priv_no)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

BD_2020_input3_miss3<-
BD_2020_input3_miss2 %>% 
   dplyr::left_join(sect_priv_imputed, by=c("sbj_num"="amelia_fit_imputations_imp1_sbj_num")) %>% 
    dplyr::mutate(sect_priv=factor(dplyr::case_when(is.na(sect_priv) & sect_priv_yes>sect_priv_no~"Private sector",
                                                  is.na(sect_priv) & sect_priv_yes<sect_priv_no~"Public sector",
                                                  TRUE~as.character(sect_priv)))) %>% 
  data.frame()
 
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#table(BD_2020_input3_miss2$salgen,exclude = NULL)

if(nrow(BD_2020_input3_miss3)-nrow(BD_2020_input3_miss2)>0){
  warning("AGS: Some rows were added in the imputation")}


As a result of the imputations, there were no missing values.


Size of the Workplace

We also imputed the variable relative to the Size of the Workplace (tamano_de_la_empresa), which had a small amount of missing values (n= 29). In case of ties, we replaced values with the lowest number of workers, reproducing the distribution of the variable.


# Ver distintos valores propuestos para sustancia de inciio
# names(amelia_fit$imputations$imp1)
tamano_de_la_empresa_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$sbj_num,
       amelia_fit$imputations$imp1$tamano_de_la_empresa,
       amelia_fit$imputations$imp2$tamano_de_la_empresa,
       amelia_fit$imputations$imp3$tamano_de_la_empresa,
       amelia_fit$imputations$imp4$tamano_de_la_empresa,
       amelia_fit$imputations$imp5$tamano_de_la_empresa,
       amelia_fit$imputations$imp6$tamano_de_la_empresa,
       amelia_fit$imputations$imp7$tamano_de_la_empresa,
       amelia_fit$imputations$imp8$tamano_de_la_empresa,
       amelia_fit$imputations$imp9$tamano_de_la_empresa,
       amelia_fit$imputations$imp10$tamano_de_la_empresa,
       amelia_fit$imputations$imp11$tamano_de_la_empresa,
       amelia_fit$imputations$imp12$tamano_de_la_empresa,
       amelia_fit$imputations$imp13$tamano_de_la_empresa,
       amelia_fit$imputations$imp14$tamano_de_la_empresa,
       amelia_fit$imputations$imp15$tamano_de_la_empresa,
       amelia_fit$imputations$imp16$tamano_de_la_empresa,
       amelia_fit$imputations$imp17$tamano_de_la_empresa,
       amelia_fit$imputations$imp18$tamano_de_la_empresa,
       amelia_fit$imputations$imp19$tamano_de_la_empresa,
       amelia_fit$imputations$imp20$tamano_de_la_empresa,
       amelia_fit$imputations$imp21$tamano_de_la_empresa,
       amelia_fit$imputations$imp22$tamano_de_la_empresa,
       amelia_fit$imputations$imp23$tamano_de_la_empresa,
       amelia_fit$imputations$imp24$tamano_de_la_empresa,
       amelia_fit$imputations$imp25$tamano_de_la_empresa,
       amelia_fit$imputations$imp26$tamano_de_la_empresa,
       amelia_fit$imputations$imp27$tamano_de_la_empresa,
       amelia_fit$imputations$imp28$tamano_de_la_empresa,
       amelia_fit$imputations$imp29$tamano_de_la_empresa,
       amelia_fit$imputations$imp30$tamano_de_la_empresa,
       amelia_fit$imputations$imp31$tamano_de_la_empresa,
       amelia_fit$imputations$imp32$tamano_de_la_empresa,
       amelia_fit$imputations$imp33$tamano_de_la_empresa,
       amelia_fit$imputations$imp34$tamano_de_la_empresa,
       amelia_fit$imputations$imp35$tamano_de_la_empresa,
       amelia_fit$imputations$imp36$tamano_de_la_empresa,
       amelia_fit$imputations$imp37$tamano_de_la_empresa,
       amelia_fit$imputations$imp38$tamano_de_la_empresa,
       amelia_fit$imputations$imp39$tamano_de_la_empresa,
       amelia_fit$imputations$imp40$tamano_de_la_empresa,
       amelia_fit$imputations$imp41$tamano_de_la_empresa,
       amelia_fit$imputations$imp42$tamano_de_la_empresa,
       amelia_fit$imputations$imp43$tamano_de_la_empresa,
       amelia_fit$imputations$imp44$tamano_de_la_empresa,
       amelia_fit$imputations$imp45$tamano_de_la_empresa,
       amelia_fit$imputations$imp46$tamano_de_la_empresa,
       amelia_fit$imputations$imp47$tamano_de_la_empresa,
       amelia_fit$imputations$imp48$tamano_de_la_empresa,
       amelia_fit$imputations$imp49$tamano_de_la_empresa,
       amelia_fit$imputations$imp50$tamano_de_la_empresa,
       amelia_fit$imputations$imp51$tamano_de_la_empresa,
       amelia_fit$imputations$imp52$tamano_de_la_empresa,
       amelia_fit$imputations$imp53$tamano_de_la_empresa,
       amelia_fit$imputations$imp54$tamano_de_la_empresa,
       amelia_fit$imputations$imp55$tamano_de_la_empresa,
       amelia_fit$imputations$imp56$tamano_de_la_empresa,
       amelia_fit$imputations$imp57$tamano_de_la_empresa,
       amelia_fit$imputations$imp58$tamano_de_la_empresa,
       amelia_fit$imputations$imp59$tamano_de_la_empresa,
       amelia_fit$imputations$imp60$tamano_de_la_empresa,
       amelia_fit$imputations$imp61$tamano_de_la_empresa
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$sbj_num") %>% 
  janitor::clean_names() %>% 
  dplyr::arrange(amelia_fit_imputations_imp1_sbj_num) %>% 
  dplyr::ungroup() %>% 
  #:#:#:#:#:
  dplyr::count(amelia_fit_imputations_imp1_sbj_num ,value) %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_sbj_num) %>% 
  dplyr::slice_max(n, n = 1) %>% 
  dplyr::summarise(one_ten=sum(value == "1 to 10",na.rm=T),
                    eleven_fourtyni=sum(value == "11 to 49",na.rm=T),
                    fifty=sum(value == "50 to 199",na.rm=T),
                    twohun=sum(value == "200 or more",na.rm=T)) %>% 
  rowwise() %>% 
  dplyr::mutate(ties=sum(c_across(one_ten:twohun)),ties=ifelse(ties>1,1,0)) %>% 
  #dplyr::filter(ties==1) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(tamano_de_la_empresa_imp= dplyr::case_when(
   ties==0 & one_ten==1~"1 to 10",
   ties==0 & eleven_fourtyni==1~"11 to 49",    
   ties==0 & fifty==1~"50 to 199",
   ties==0 & twohun==1~"200 or more",
   ties==1 & one_ten==1~"1 to 10",
   ties==1 & one_ten==0 & eleven_fourtyni==1~"11 to 49",    
   ties==1 & one_ten==0 & eleven_fourtyni==0 & fifty==1~"50 to 199",    
   ties==1 & one_ten==0 & eleven_fourtyni==0 & fifty==0 & twohun==1~"200 or more"
  ))
   # $3035303 
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

BD_2020_input3_miss4<-
BD_2020_input3_miss3 %>% 
   dplyr::left_join(tamano_de_la_empresa_imputed[,c("amelia_fit_imputations_imp1_sbj_num","tamano_de_la_empresa_imp")], by=c("sbj_num"="amelia_fit_imputations_imp1_sbj_num")) %>% 
    dplyr::mutate(tamano_de_la_empresa=factor(dplyr::case_when(is.na(tamano_de_la_empresa)~tamano_de_la_empresa_imp,
                                                  TRUE~as.character(tamano_de_la_empresa)),
                  levels=c("1 to 10","11 to 49","50 to 199","200 or more"),
                  ordered=T)) %>% 
  data.frame()
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#table(BD_2020_input3_miss2$depre,exclude = NULL)

if(nrow(BD_2020_input3_miss4)-nrow(BD_2020_input3_miss3)>0){
  warning("AGS: Some rows were added in the imputation")}


As a result of the imputations, there were no missing values.


Qualification standards of workers

We also imputed the single-item question whether the participant could be classified as an Unskilled Worker (Plant and Machine Operators, Technicians, Transport Laborers & Unskilled Workers vs. Clerks, Sales Workers, Skilled Service Workers, Managers & Professionals) (ocuprev_nocalif), which had a small amount of missing values (n= 4).


# Ver distintos valores propuestos para sustancia de inciio
# names(amelia_fit$imputations$imp1)
ocuprev_nocalif_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$sbj_num,
       amelia_fit$imputations$imp1$ocuprev_nocalif,
       amelia_fit$imputations$imp2$ocuprev_nocalif,
       amelia_fit$imputations$imp3$ocuprev_nocalif,
       amelia_fit$imputations$imp4$ocuprev_nocalif,
       amelia_fit$imputations$imp5$ocuprev_nocalif,
       amelia_fit$imputations$imp6$ocuprev_nocalif,
       amelia_fit$imputations$imp7$ocuprev_nocalif,
       amelia_fit$imputations$imp8$ocuprev_nocalif,
       amelia_fit$imputations$imp9$ocuprev_nocalif,
       amelia_fit$imputations$imp10$ocuprev_nocalif,
       amelia_fit$imputations$imp11$ocuprev_nocalif,
       amelia_fit$imputations$imp12$ocuprev_nocalif,
       amelia_fit$imputations$imp13$ocuprev_nocalif,
       amelia_fit$imputations$imp14$ocuprev_nocalif,
       amelia_fit$imputations$imp15$ocuprev_nocalif,
       amelia_fit$imputations$imp16$ocuprev_nocalif,
       amelia_fit$imputations$imp17$ocuprev_nocalif,
       amelia_fit$imputations$imp18$ocuprev_nocalif,
       amelia_fit$imputations$imp19$ocuprev_nocalif,
       amelia_fit$imputations$imp20$ocuprev_nocalif,
       amelia_fit$imputations$imp21$ocuprev_nocalif,
       amelia_fit$imputations$imp22$ocuprev_nocalif,
       amelia_fit$imputations$imp23$ocuprev_nocalif,
       amelia_fit$imputations$imp24$ocuprev_nocalif,
       amelia_fit$imputations$imp25$ocuprev_nocalif,
       amelia_fit$imputations$imp26$ocuprev_nocalif,
       amelia_fit$imputations$imp27$ocuprev_nocalif,
       amelia_fit$imputations$imp28$ocuprev_nocalif,
       amelia_fit$imputations$imp29$ocuprev_nocalif,
       amelia_fit$imputations$imp30$ocuprev_nocalif,
       amelia_fit$imputations$imp31$ocuprev_nocalif,
       amelia_fit$imputations$imp32$ocuprev_nocalif,
       amelia_fit$imputations$imp33$ocuprev_nocalif,
       amelia_fit$imputations$imp34$ocuprev_nocalif,
       amelia_fit$imputations$imp35$ocuprev_nocalif,
       amelia_fit$imputations$imp36$ocuprev_nocalif,
       amelia_fit$imputations$imp37$ocuprev_nocalif,
       amelia_fit$imputations$imp38$ocuprev_nocalif,
       amelia_fit$imputations$imp39$ocuprev_nocalif,
       amelia_fit$imputations$imp40$ocuprev_nocalif,
       amelia_fit$imputations$imp41$ocuprev_nocalif,
       amelia_fit$imputations$imp42$ocuprev_nocalif,
       amelia_fit$imputations$imp43$ocuprev_nocalif,
       amelia_fit$imputations$imp44$ocuprev_nocalif,
       amelia_fit$imputations$imp45$ocuprev_nocalif,
       amelia_fit$imputations$imp46$ocuprev_nocalif,
       amelia_fit$imputations$imp47$ocuprev_nocalif,
       amelia_fit$imputations$imp48$ocuprev_nocalif,
       amelia_fit$imputations$imp49$ocuprev_nocalif,
       amelia_fit$imputations$imp50$ocuprev_nocalif,
       amelia_fit$imputations$imp51$ocuprev_nocalif,
       amelia_fit$imputations$imp52$ocuprev_nocalif,
       amelia_fit$imputations$imp53$ocuprev_nocalif,
       amelia_fit$imputations$imp54$ocuprev_nocalif,
       amelia_fit$imputations$imp55$ocuprev_nocalif,
       amelia_fit$imputations$imp56$ocuprev_nocalif,
       amelia_fit$imputations$imp57$ocuprev_nocalif,
       amelia_fit$imputations$imp58$ocuprev_nocalif,
       amelia_fit$imputations$imp59$ocuprev_nocalif,
       amelia_fit$imputations$imp60$ocuprev_nocalif,
       amelia_fit$imputations$imp61$ocuprev_nocalif
       ) %>% 
  data.frame() %>% 
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.ocuprev_nocalif:amelia_fit.imputations.imp61.ocuprev_nocalif),~dplyr::case_when(grepl("Yes",as.character(.))~1,TRUE~0), .names="yes_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.ocuprev_nocalif:amelia_fit.imputations.imp61.ocuprev_nocalif),~dplyr::case_when(grepl("No",as.character(.))~1,TRUE~0), .names="no_{col}"))%>%
  dplyr::mutate(ocuprev_nocalif_yes = base::rowSums(dplyr::select(., starts_with("yes_"))))%>%
  dplyr::mutate(ocuprev_nocalif_no = base::rowSums(dplyr::select(., starts_with("no_"))))%>%
  janitor::clean_names() %>% 
  dplyr::select(amelia_fit_imputations_imp1_sbj_num,ocuprev_nocalif_yes,ocuprev_nocalif_no)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

BD_2020_input3_miss5<-
BD_2020_input3_miss4 %>% 
   dplyr::left_join(ocuprev_nocalif_imputed, by=c("sbj_num"="amelia_fit_imputations_imp1_sbj_num")) %>% 
    dplyr::mutate(ocuprev_nocalif=factor(dplyr::case_when(is.na(ocuprev_nocalif) & ocuprev_nocalif_yes>ocuprev_nocalif_no~"Yes",
                                                  is.na(ocuprev_nocalif) & ocuprev_nocalif_yes<ocuprev_nocalif_no~"No",
                                                  TRUE~as.character(ocuprev_nocalif)))) %>% 
  data.frame()
 
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#table(BD_2020_input3_miss2$psicotrop,exclude = NULL)

if(nrow(BD_2020_input3_miss5)-nrow(BD_2020_input3_miss4)>0){
  warning("AGS: Some rows were added in the imputation")}


As a result of the imputations, there were no missing values.


Supervises the tasks of other workers

We also imputed the single-item question whether the participant Supervises the tasks of other workers (superv), which had a small amount of missing values (n= 9).


# names(amelia_fit$imputations$imp1)
superv_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$sbj_num,
       amelia_fit$imputations$imp1$superv,
       amelia_fit$imputations$imp2$superv,
       amelia_fit$imputations$imp3$superv,
       amelia_fit$imputations$imp4$superv,
       amelia_fit$imputations$imp5$superv,
       amelia_fit$imputations$imp6$superv,
       amelia_fit$imputations$imp7$superv,
       amelia_fit$imputations$imp8$superv,
       amelia_fit$imputations$imp9$superv,
       amelia_fit$imputations$imp10$superv,
       amelia_fit$imputations$imp11$superv,
       amelia_fit$imputations$imp12$superv,
       amelia_fit$imputations$imp13$superv,
       amelia_fit$imputations$imp14$superv,
       amelia_fit$imputations$imp15$superv,
       amelia_fit$imputations$imp16$superv,
       amelia_fit$imputations$imp17$superv,
       amelia_fit$imputations$imp18$superv,
       amelia_fit$imputations$imp19$superv,
       amelia_fit$imputations$imp20$superv,
       amelia_fit$imputations$imp21$superv,
       amelia_fit$imputations$imp22$superv,
       amelia_fit$imputations$imp23$superv,
       amelia_fit$imputations$imp24$superv,
       amelia_fit$imputations$imp25$superv,
       amelia_fit$imputations$imp26$superv,
       amelia_fit$imputations$imp27$superv,
       amelia_fit$imputations$imp28$superv,
       amelia_fit$imputations$imp29$superv,
       amelia_fit$imputations$imp30$superv,
       amelia_fit$imputations$imp31$superv,
       amelia_fit$imputations$imp32$superv,
       amelia_fit$imputations$imp33$superv,
       amelia_fit$imputations$imp34$superv,
       amelia_fit$imputations$imp35$superv,
       amelia_fit$imputations$imp36$superv,
       amelia_fit$imputations$imp37$superv,
       amelia_fit$imputations$imp38$superv,
       amelia_fit$imputations$imp39$superv,
       amelia_fit$imputations$imp40$superv,
       amelia_fit$imputations$imp41$superv,
       amelia_fit$imputations$imp42$superv,
       amelia_fit$imputations$imp43$superv,
       amelia_fit$imputations$imp44$superv,
       amelia_fit$imputations$imp45$superv,
       amelia_fit$imputations$imp46$superv,
       amelia_fit$imputations$imp47$superv,
       amelia_fit$imputations$imp48$superv,
       amelia_fit$imputations$imp49$superv,
       amelia_fit$imputations$imp50$superv,
       amelia_fit$imputations$imp51$superv,
       amelia_fit$imputations$imp52$superv,
       amelia_fit$imputations$imp53$superv,
       amelia_fit$imputations$imp54$superv,
       amelia_fit$imputations$imp55$superv,
       amelia_fit$imputations$imp56$superv,
       amelia_fit$imputations$imp57$superv,
       amelia_fit$imputations$imp58$superv,
       amelia_fit$imputations$imp59$superv,
       amelia_fit$imputations$imp60$superv,
       amelia_fit$imputations$imp61$superv
       ) %>% 
  data.frame() %>% 
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.superv:amelia_fit.imputations.imp61.superv),~dplyr::case_when(grepl("Yes",as.character(.))~1,TRUE~0), .names="yes_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.superv:amelia_fit.imputations.imp61.superv),~dplyr::case_when(grepl("No",as.character(.))~1,TRUE~0), .names="no_{col}"))%>%
  dplyr::mutate(superv_yes = base::rowSums(dplyr::select(., starts_with("yes_"))))%>%
  dplyr::mutate(superv_no = base::rowSums(dplyr::select(., starts_with("no_"))))%>%
  janitor::clean_names() %>% 
  dplyr::select(amelia_fit_imputations_imp1_sbj_num,superv_yes,superv_no)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

BD_2020_input3_miss6<-
BD_2020_input3_miss5 %>% 
   dplyr::left_join(superv_imputed, by=c("sbj_num"="amelia_fit_imputations_imp1_sbj_num")) %>% 
    dplyr::mutate(superv=factor(dplyr::case_when(is.na(superv) & superv_yes>superv_no~"Yes",
                                                  is.na(superv) & superv_yes<superv_no~"No",
                                                  TRUE~as.character(superv)))) %>% 
  data.frame()
 
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#table(BD_2020_input3_miss2$superv,exclude = NULL)

if(nrow(BD_2020_input3_miss6)-nrow(BD_2020_input3_miss5)>0){
  warning("AGS: Some rows were added in the imputation")}


As a result of the imputations, there were no missing values.


Head of Household

We also imputed the single-item question whether the participant is the Head of Household (jef_hogar), which had a small amount of missing values (n= 1).


# Ver distintos valores propuestos para sustancia de inciio
# names(amelia_fit$imputations$imp1)
jef_hogar_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$sbj_num,
       amelia_fit$imputations$imp1$jef_hogar,
       amelia_fit$imputations$imp2$jef_hogar,
       amelia_fit$imputations$imp3$jef_hogar,
       amelia_fit$imputations$imp4$jef_hogar,
       amelia_fit$imputations$imp5$jef_hogar,
       amelia_fit$imputations$imp6$jef_hogar,
       amelia_fit$imputations$imp7$jef_hogar,
       amelia_fit$imputations$imp8$jef_hogar,
       amelia_fit$imputations$imp9$jef_hogar,
       amelia_fit$imputations$imp10$jef_hogar,
       amelia_fit$imputations$imp11$jef_hogar,
       amelia_fit$imputations$imp12$jef_hogar,
       amelia_fit$imputations$imp13$jef_hogar,
       amelia_fit$imputations$imp14$jef_hogar,
       amelia_fit$imputations$imp15$jef_hogar,
       amelia_fit$imputations$imp16$jef_hogar,
       amelia_fit$imputations$imp17$jef_hogar,
       amelia_fit$imputations$imp18$jef_hogar,
       amelia_fit$imputations$imp19$jef_hogar,
       amelia_fit$imputations$imp20$jef_hogar,
       amelia_fit$imputations$imp21$jef_hogar,
       amelia_fit$imputations$imp22$jef_hogar,
       amelia_fit$imputations$imp23$jef_hogar,
       amelia_fit$imputations$imp24$jef_hogar,
       amelia_fit$imputations$imp25$jef_hogar,
       amelia_fit$imputations$imp26$jef_hogar,
       amelia_fit$imputations$imp27$jef_hogar,
       amelia_fit$imputations$imp28$jef_hogar,
       amelia_fit$imputations$imp29$jef_hogar,
       amelia_fit$imputations$imp30$jef_hogar,
       amelia_fit$imputations$imp31$jef_hogar,
       amelia_fit$imputations$imp32$jef_hogar,
       amelia_fit$imputations$imp33$jef_hogar,
       amelia_fit$imputations$imp34$jef_hogar,
       amelia_fit$imputations$imp35$jef_hogar,
       amelia_fit$imputations$imp36$jef_hogar,
       amelia_fit$imputations$imp37$jef_hogar,
       amelia_fit$imputations$imp38$jef_hogar,
       amelia_fit$imputations$imp39$jef_hogar,
       amelia_fit$imputations$imp40$jef_hogar,
       amelia_fit$imputations$imp41$jef_hogar,
       amelia_fit$imputations$imp42$jef_hogar,
       amelia_fit$imputations$imp43$jef_hogar,
       amelia_fit$imputations$imp44$jef_hogar,
       amelia_fit$imputations$imp45$jef_hogar,
       amelia_fit$imputations$imp46$jef_hogar,
       amelia_fit$imputations$imp47$jef_hogar,
       amelia_fit$imputations$imp48$jef_hogar,
       amelia_fit$imputations$imp49$jef_hogar,
       amelia_fit$imputations$imp50$jef_hogar,
       amelia_fit$imputations$imp51$jef_hogar,
       amelia_fit$imputations$imp52$jef_hogar,
       amelia_fit$imputations$imp53$jef_hogar,
       amelia_fit$imputations$imp54$jef_hogar,
       amelia_fit$imputations$imp55$jef_hogar,
       amelia_fit$imputations$imp56$jef_hogar,
       amelia_fit$imputations$imp57$jef_hogar,
       amelia_fit$imputations$imp58$jef_hogar,
       amelia_fit$imputations$imp59$jef_hogar,
       amelia_fit$imputations$imp60$jef_hogar,
       amelia_fit$imputations$imp61$jef_hogar
       ) %>% 
  data.frame() %>% 
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.jef_hogar:amelia_fit.imputations.imp61.jef_hogar),~dplyr::case_when(grepl("Yes",as.character(.))~1,TRUE~0), .names="yes_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.jef_hogar:amelia_fit.imputations.imp61.jef_hogar),~dplyr::case_when(grepl("No",as.character(.))~1,TRUE~0), .names="no_{col}"))%>%
  dplyr::mutate(jef_hogar_yes = base::rowSums(dplyr::select(., starts_with("yes_"))))%>%
  dplyr::mutate(jef_hogar_no = base::rowSums(dplyr::select(., starts_with("no_"))))%>%
  janitor::clean_names() %>% 
  dplyr::select(amelia_fit_imputations_imp1_sbj_num,jef_hogar_yes,jef_hogar_no)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

BD_2020_input3_miss7<-
BD_2020_input3_miss6 %>% 
   dplyr::left_join(jef_hogar_imputed, by=c("sbj_num"="amelia_fit_imputations_imp1_sbj_num")) %>% 
    dplyr::mutate(jef_hogar=factor(dplyr::case_when(is.na(jef_hogar) & jef_hogar_yes>jef_hogar_no~"Yes",
                                                  is.na(jef_hogar) & jef_hogar_yes<jef_hogar_no~"No",
                                                  TRUE~as.character(jef_hogar)))) %>% 
  data.frame()
 
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#table(BD_2020_input3_miss2$jef_hogar,exclude = NULL)

if(nrow(BD_2020_input3_miss7)-nrow(BD_2020_input3_miss6)>0){
  warning("AGS: Some rows were added in the imputation")}


As a result of the imputations, there were no missing values.


Had not Completed Secondary School

We also imputed the single-item question whether the participant had not completed Secondary School (educacion_media), which had a small amount of missing values (n= 6).


# Ver distintos valores propuestos para sustancia de inciio
# names(amelia_fit$imputations$imp1)
educacion_media_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$sbj_num,
       amelia_fit$imputations$imp1$educacion_media,
       amelia_fit$imputations$imp2$educacion_media,
       amelia_fit$imputations$imp3$educacion_media,
       amelia_fit$imputations$imp4$educacion_media,
       amelia_fit$imputations$imp5$educacion_media,
       amelia_fit$imputations$imp6$educacion_media,
       amelia_fit$imputations$imp7$educacion_media,
       amelia_fit$imputations$imp8$educacion_media,
       amelia_fit$imputations$imp9$educacion_media,
       amelia_fit$imputations$imp10$educacion_media,
       amelia_fit$imputations$imp11$educacion_media,
       amelia_fit$imputations$imp12$educacion_media,
       amelia_fit$imputations$imp13$educacion_media,
       amelia_fit$imputations$imp14$educacion_media,
       amelia_fit$imputations$imp15$educacion_media,
       amelia_fit$imputations$imp16$educacion_media,
       amelia_fit$imputations$imp17$educacion_media,
       amelia_fit$imputations$imp18$educacion_media,
       amelia_fit$imputations$imp19$educacion_media,
       amelia_fit$imputations$imp20$educacion_media,
       amelia_fit$imputations$imp21$educacion_media,
       amelia_fit$imputations$imp22$educacion_media,
       amelia_fit$imputations$imp23$educacion_media,
       amelia_fit$imputations$imp24$educacion_media,
       amelia_fit$imputations$imp25$educacion_media,
       amelia_fit$imputations$imp26$educacion_media,
       amelia_fit$imputations$imp27$educacion_media,
       amelia_fit$imputations$imp28$educacion_media,
       amelia_fit$imputations$imp29$educacion_media,
       amelia_fit$imputations$imp30$educacion_media,
       amelia_fit$imputations$imp31$educacion_media,
       amelia_fit$imputations$imp32$educacion_media,
       amelia_fit$imputations$imp33$educacion_media,
       amelia_fit$imputations$imp34$educacion_media,
       amelia_fit$imputations$imp35$educacion_media,
       amelia_fit$imputations$imp36$educacion_media,
       amelia_fit$imputations$imp37$educacion_media,
       amelia_fit$imputations$imp38$educacion_media,
       amelia_fit$imputations$imp39$educacion_media,
       amelia_fit$imputations$imp40$educacion_media,
       amelia_fit$imputations$imp41$educacion_media,
       amelia_fit$imputations$imp42$educacion_media,
       amelia_fit$imputations$imp43$educacion_media,
       amelia_fit$imputations$imp44$educacion_media,
       amelia_fit$imputations$imp45$educacion_media,
       amelia_fit$imputations$imp46$educacion_media,
       amelia_fit$imputations$imp47$educacion_media,
       amelia_fit$imputations$imp48$educacion_media,
       amelia_fit$imputations$imp49$educacion_media,
       amelia_fit$imputations$imp50$educacion_media,
       amelia_fit$imputations$imp51$educacion_media,
       amelia_fit$imputations$imp52$educacion_media,
       amelia_fit$imputations$imp53$educacion_media,
       amelia_fit$imputations$imp54$educacion_media,
       amelia_fit$imputations$imp55$educacion_media,
       amelia_fit$imputations$imp56$educacion_media,
       amelia_fit$imputations$imp57$educacion_media,
       amelia_fit$imputations$imp58$educacion_media,
       amelia_fit$imputations$imp59$educacion_media,
       amelia_fit$imputations$imp60$educacion_media,
       amelia_fit$imputations$imp61$educacion_media
       ) %>% 
  data.frame() %>% 
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.educacion_media:amelia_fit.imputations.imp61.educacion_media),~dplyr::case_when(grepl("Yes",as.character(.))~1,TRUE~0), .names="yes_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.educacion_media:amelia_fit.imputations.imp61.educacion_media),~dplyr::case_when(grepl("No",as.character(.))~1,TRUE~0), .names="no_{col}"))%>%
  dplyr::mutate(educacion_media_yes = base::rowSums(dplyr::select(., starts_with("yes_"))))%>%
  dplyr::mutate(educacion_media_no = base::rowSums(dplyr::select(., starts_with("no_"))))%>%
  janitor::clean_names() %>% 
  dplyr::select(amelia_fit_imputations_imp1_sbj_num,educacion_media_yes,educacion_media_no)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

BD_2020_input3_miss8<-
BD_2020_input3_miss7 %>% 
   dplyr::left_join(educacion_media_imputed, by=c("sbj_num"="amelia_fit_imputations_imp1_sbj_num")) %>% 
    dplyr::mutate(educacion_media=factor(dplyr::case_when(is.na(educacion_media) & educacion_media_yes>educacion_media_no~"Yes",
                                                  is.na(educacion_media) & educacion_media_yes<educacion_media_no~"No",
                                                  TRUE~as.character(educacion_media)))) %>% 
  data.frame()
 
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#table(BD_2020_input3_miss2$educacion_media,exclude = NULL)

if(nrow(BD_2020_input3_miss8)-nrow(BD_2020_input3_miss7)>0){
  warning("AGS: Some rows were added in the imputation")}


As a result of the imputations, there were no missing values.


Fixed-term Contract

We also imputed the single-item question whether the participant had a Fixed Contract (cont_def), which had a small amount of missing values (n= 33).


# Ver distintos valores propuestos para sustancia de inciio
# names(amelia_fit$imputations$imp1)
cont_def_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$sbj_num,
       amelia_fit$imputations$imp1$cont_def,
       amelia_fit$imputations$imp2$cont_def,
       amelia_fit$imputations$imp3$cont_def,
       amelia_fit$imputations$imp4$cont_def,
       amelia_fit$imputations$imp5$cont_def,
       amelia_fit$imputations$imp6$cont_def,
       amelia_fit$imputations$imp7$cont_def,
       amelia_fit$imputations$imp8$cont_def,
       amelia_fit$imputations$imp9$cont_def,
       amelia_fit$imputations$imp10$cont_def,
       amelia_fit$imputations$imp11$cont_def,
       amelia_fit$imputations$imp12$cont_def,
       amelia_fit$imputations$imp13$cont_def,
       amelia_fit$imputations$imp14$cont_def,
       amelia_fit$imputations$imp15$cont_def,
       amelia_fit$imputations$imp16$cont_def,
       amelia_fit$imputations$imp17$cont_def,
       amelia_fit$imputations$imp18$cont_def,
       amelia_fit$imputations$imp19$cont_def,
       amelia_fit$imputations$imp20$cont_def,
       amelia_fit$imputations$imp21$cont_def,
       amelia_fit$imputations$imp22$cont_def,
       amelia_fit$imputations$imp23$cont_def,
       amelia_fit$imputations$imp24$cont_def,
       amelia_fit$imputations$imp25$cont_def,
       amelia_fit$imputations$imp26$cont_def,
       amelia_fit$imputations$imp27$cont_def,
       amelia_fit$imputations$imp28$cont_def,
       amelia_fit$imputations$imp29$cont_def,
       amelia_fit$imputations$imp30$cont_def,
       amelia_fit$imputations$imp31$cont_def,
       amelia_fit$imputations$imp32$cont_def,
       amelia_fit$imputations$imp33$cont_def,
       amelia_fit$imputations$imp34$cont_def,
       amelia_fit$imputations$imp35$cont_def,
       amelia_fit$imputations$imp36$cont_def,
       amelia_fit$imputations$imp37$cont_def,
       amelia_fit$imputations$imp38$cont_def,
       amelia_fit$imputations$imp39$cont_def,
       amelia_fit$imputations$imp40$cont_def,
       amelia_fit$imputations$imp41$cont_def,
       amelia_fit$imputations$imp42$cont_def,
       amelia_fit$imputations$imp43$cont_def,
       amelia_fit$imputations$imp44$cont_def,
       amelia_fit$imputations$imp45$cont_def,
       amelia_fit$imputations$imp46$cont_def,
       amelia_fit$imputations$imp47$cont_def,
       amelia_fit$imputations$imp48$cont_def,
       amelia_fit$imputations$imp49$cont_def,
       amelia_fit$imputations$imp50$cont_def,
       amelia_fit$imputations$imp51$cont_def,
       amelia_fit$imputations$imp52$cont_def,
       amelia_fit$imputations$imp53$cont_def,
       amelia_fit$imputations$imp54$cont_def,
       amelia_fit$imputations$imp55$cont_def,
       amelia_fit$imputations$imp56$cont_def,
       amelia_fit$imputations$imp57$cont_def,
       amelia_fit$imputations$imp58$cont_def,
       amelia_fit$imputations$imp59$cont_def,
       amelia_fit$imputations$imp60$cont_def,
       amelia_fit$imputations$imp61$cont_def
       ) %>% 
  data.frame() %>% 
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.cont_def:amelia_fit.imputations.imp61.cont_def),~dplyr::case_when(grepl("Yes",as.character(.))~1,TRUE~0), .names="yes_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.cont_def:amelia_fit.imputations.imp61.cont_def),~dplyr::case_when(grepl("No",as.character(.))~1,TRUE~0), .names="no_{col}"))%>%
  dplyr::mutate(cont_def_yes = base::rowSums(dplyr::select(., starts_with("yes_"))))%>%
  dplyr::mutate(cont_def_no = base::rowSums(dplyr::select(., starts_with("no_"))))%>%
  janitor::clean_names() %>% 
  dplyr::select(amelia_fit_imputations_imp1_sbj_num,cont_def_yes,cont_def_no)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

BD_2020_input3_miss9<-
BD_2020_input3_miss8 %>% 
   dplyr::left_join(cont_def_imputed, by=c("sbj_num"="amelia_fit_imputations_imp1_sbj_num")) %>% 
    dplyr::mutate(cont_def=factor(dplyr::case_when(is.na(cont_def) & cont_def_yes>cont_def_no~"Yes",
                                                  is.na(cont_def) & cont_def_yes<cont_def_no~"No",
                                                  TRUE~as.character(cont_def)))) %>% 
  data.frame()
 
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#table(BD_2020_input3_miss2$cont_def,exclude = NULL)

if(nrow(BD_2020_input3_miss9)-nrow(BD_2020_input3_miss8)>0){
  warning("AGS: Some rows were added in the imputation")}


As a result of the imputations, there were no missing values.


Unpaid Care Work at Home

We also imputed the single-item question whether the participant reported doing Unpaid Care Work at Home (carga_dom), which had a small amount of missing values (n= 5).


# Ver distintos valores propuestos para sustancia de inciio
# names(amelia_fit$imputations$imp1)
carga_dom_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$sbj_num,
       amelia_fit$imputations$imp1$carga_dom,
       amelia_fit$imputations$imp2$carga_dom,
       amelia_fit$imputations$imp3$carga_dom,
       amelia_fit$imputations$imp4$carga_dom,
       amelia_fit$imputations$imp5$carga_dom,
       amelia_fit$imputations$imp6$carga_dom,
       amelia_fit$imputations$imp7$carga_dom,
       amelia_fit$imputations$imp8$carga_dom,
       amelia_fit$imputations$imp9$carga_dom,
       amelia_fit$imputations$imp10$carga_dom,
       amelia_fit$imputations$imp11$carga_dom,
       amelia_fit$imputations$imp12$carga_dom,
       amelia_fit$imputations$imp13$carga_dom,
       amelia_fit$imputations$imp14$carga_dom,
       amelia_fit$imputations$imp15$carga_dom,
       amelia_fit$imputations$imp16$carga_dom,
       amelia_fit$imputations$imp17$carga_dom,
       amelia_fit$imputations$imp18$carga_dom,
       amelia_fit$imputations$imp19$carga_dom,
       amelia_fit$imputations$imp20$carga_dom,
       amelia_fit$imputations$imp21$carga_dom,
       amelia_fit$imputations$imp22$carga_dom,
       amelia_fit$imputations$imp23$carga_dom,
       amelia_fit$imputations$imp24$carga_dom,
       amelia_fit$imputations$imp25$carga_dom,
       amelia_fit$imputations$imp26$carga_dom,
       amelia_fit$imputations$imp27$carga_dom,
       amelia_fit$imputations$imp28$carga_dom,
       amelia_fit$imputations$imp29$carga_dom,
       amelia_fit$imputations$imp30$carga_dom,
       amelia_fit$imputations$imp31$carga_dom,
       amelia_fit$imputations$imp32$carga_dom,
       amelia_fit$imputations$imp33$carga_dom,
       amelia_fit$imputations$imp34$carga_dom,
       amelia_fit$imputations$imp35$carga_dom,
       amelia_fit$imputations$imp36$carga_dom,
       amelia_fit$imputations$imp37$carga_dom,
       amelia_fit$imputations$imp38$carga_dom,
       amelia_fit$imputations$imp39$carga_dom,
       amelia_fit$imputations$imp40$carga_dom,
       amelia_fit$imputations$imp41$carga_dom,
       amelia_fit$imputations$imp42$carga_dom,
       amelia_fit$imputations$imp43$carga_dom,
       amelia_fit$imputations$imp44$carga_dom,
       amelia_fit$imputations$imp45$carga_dom,
       amelia_fit$imputations$imp46$carga_dom,
       amelia_fit$imputations$imp47$carga_dom,
       amelia_fit$imputations$imp48$carga_dom,
       amelia_fit$imputations$imp49$carga_dom,
       amelia_fit$imputations$imp50$carga_dom,
       amelia_fit$imputations$imp51$carga_dom,
       amelia_fit$imputations$imp52$carga_dom,
       amelia_fit$imputations$imp53$carga_dom,
       amelia_fit$imputations$imp54$carga_dom,
       amelia_fit$imputations$imp55$carga_dom,
       amelia_fit$imputations$imp56$carga_dom,
       amelia_fit$imputations$imp57$carga_dom,
       amelia_fit$imputations$imp58$carga_dom,
       amelia_fit$imputations$imp59$carga_dom,
       amelia_fit$imputations$imp60$carga_dom,
       amelia_fit$imputations$imp61$carga_dom
       ) %>% 
  data.frame() %>% 
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.carga_dom:amelia_fit.imputations.imp61.carga_dom),~dplyr::case_when(grepl("Yes",as.character(.))~1,TRUE~0), .names="yes_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.carga_dom:amelia_fit.imputations.imp61.carga_dom),~dplyr::case_when(grepl("No",as.character(.))~1,TRUE~0), .names="no_{col}"))%>%
  dplyr::mutate(carga_dom_yes = base::rowSums(dplyr::select(., starts_with("yes_"))))%>%
  dplyr::mutate(carga_dom_no = base::rowSums(dplyr::select(., starts_with("no_"))))%>%
  janitor::clean_names() %>% 
  dplyr::select(amelia_fit_imputations_imp1_sbj_num,carga_dom_yes,carga_dom_no)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

BD_2020_input3_miss10<-
BD_2020_input3_miss9 %>% 
   dplyr::left_join(carga_dom_imputed, by=c("sbj_num"="amelia_fit_imputations_imp1_sbj_num")) %>% 
    dplyr::mutate(carga_dom=factor(dplyr::case_when(is.na(carga_dom) & carga_dom_yes>carga_dom_no~"Yes",
                                                  is.na(carga_dom) & carga_dom_yes<carga_dom_no~"No",
                                                  TRUE~as.character(carga_dom)))) %>% 
  data.frame()#%>% janitor::tabyl(carga_dom)
 
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#table(BD_2020_input3_miss2$carga_dom,exclude = NULL)

if(nrow(BD_2020_input3_miss10)-nrow(BD_2020_input3_miss9)>0){
  warning("AGS: Some rows were added in the imputation")}


As a result of the imputations, there were no missing values.


Economic Narrowness

We also imputed the single-item question whether the participant reported Economic Narrowness (a61), which had a small amount of missing values (n= 11). In case of ties, we chose the most vulnerable value.


# Ver distintos valores propuestos para sustancia de inciio
# names(amelia_fit$imputations$imp1)
a61_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$sbj_num,
       amelia_fit$imputations$imp1$a61,
       amelia_fit$imputations$imp2$a61,
       amelia_fit$imputations$imp3$a61,
       amelia_fit$imputations$imp4$a61,
       amelia_fit$imputations$imp5$a61,
       amelia_fit$imputations$imp6$a61,
       amelia_fit$imputations$imp7$a61,
       amelia_fit$imputations$imp8$a61,
       amelia_fit$imputations$imp9$a61,
       amelia_fit$imputations$imp10$a61,
       amelia_fit$imputations$imp11$a61,
       amelia_fit$imputations$imp12$a61,
       amelia_fit$imputations$imp13$a61,
       amelia_fit$imputations$imp14$a61,
       amelia_fit$imputations$imp15$a61,
       amelia_fit$imputations$imp16$a61,
       amelia_fit$imputations$imp17$a61,
       amelia_fit$imputations$imp18$a61,
       amelia_fit$imputations$imp19$a61,
       amelia_fit$imputations$imp20$a61,
       amelia_fit$imputations$imp21$a61,
       amelia_fit$imputations$imp22$a61,
       amelia_fit$imputations$imp23$a61,
       amelia_fit$imputations$imp24$a61,
       amelia_fit$imputations$imp25$a61,
       amelia_fit$imputations$imp26$a61,
       amelia_fit$imputations$imp27$a61,
       amelia_fit$imputations$imp28$a61,
       amelia_fit$imputations$imp29$a61,
       amelia_fit$imputations$imp30$a61,
       amelia_fit$imputations$imp31$a61,
       amelia_fit$imputations$imp32$a61,
       amelia_fit$imputations$imp33$a61,
       amelia_fit$imputations$imp34$a61,
       amelia_fit$imputations$imp35$a61,
       amelia_fit$imputations$imp36$a61,
       amelia_fit$imputations$imp37$a61,
       amelia_fit$imputations$imp38$a61,
       amelia_fit$imputations$imp39$a61,
       amelia_fit$imputations$imp40$a61,
       amelia_fit$imputations$imp41$a61,
       amelia_fit$imputations$imp42$a61,
       amelia_fit$imputations$imp43$a61,
       amelia_fit$imputations$imp44$a61,
       amelia_fit$imputations$imp45$a61,
       amelia_fit$imputations$imp46$a61,
       amelia_fit$imputations$imp47$a61,
       amelia_fit$imputations$imp48$a61,
       amelia_fit$imputations$imp49$a61,
       amelia_fit$imputations$imp50$a61,
       amelia_fit$imputations$imp51$a61,
       amelia_fit$imputations$imp52$a61,
       amelia_fit$imputations$imp53$a61,
       amelia_fit$imputations$imp54$a61,
       amelia_fit$imputations$imp55$a61,
       amelia_fit$imputations$imp56$a61,
       amelia_fit$imputations$imp57$a61,
       amelia_fit$imputations$imp58$a61,
       amelia_fit$imputations$imp59$a61,
       amelia_fit$imputations$imp60$a61,
       amelia_fit$imputations$imp61$a61
       ) %>% 
  data.frame() %>% 
  janitor::clean_names() %>% 
  melt(id.vars="amelia_fit_imputations_imp1_sbj_num") %>% 
  dplyr::arrange(amelia_fit_imputations_imp1_sbj_num) %>% 
  dplyr::ungroup() %>%
  dplyr::count(amelia_fit_imputations_imp1_sbj_num,value) %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_sbj_num) %>% 
  #de ahí, por cada sujeto, seleccionar los con valores más altos
  dplyr::slice_max(n, n = 1) %>% 
  #1 There is just enough money, with no greater difficulties
  #2 There is enough money, being able to save               
  #3 There is not enough money, with some difficulties       
  #4 There is not enough money, with greater difficulties  
  dplyr::summarise(enough=sum(n[value == "There is just enough money, with no greater difficulties"],na.rm=T),
                    just=sum(n[value == "There is enough money, being able to save"],na.rm=T),
                    not_enough=sum(n[value == "There is not enough money, with some difficulties"],na.rm=T),
                    not_enough_diff=sum(n[value == "There is not enough money, with greater difficulties"],na.rm=T)) %>% 
  #dplyr::filter(hay_acoso==1)
  rowwise() %>% 
  dplyr::mutate_at(.vars = vars(c("enough","just","not_enough","not_enough_diff")),
                   .funs = list(`na`=~ifelse(.>1, 1, 0))) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(ties=rowSums(dplyr::select(.,ends_with("_na")),na.rm=T),
                ties=ifelse(ties>1,1,0)) %>% 
  dplyr::ungroup() %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_sbj_num) %>% 
  #en caso que hayan más filas producto de los ties
  slice(1) %>% 
  dplyr::mutate(a61_imputation= factor(dplyr::case_when(enough>0 & ties==0~"There is just enough money, with no greater difficulties",just>0 & ties==0~"There is enough money, being able to save",not_enough>0 & ties==0~"There is not enough money, with some difficulties",not_enough_diff>0 & ties==0~"There is not enough money, with greater difficulties",TRUE~NA_character_)))
## slice (grouped): no rows removed
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#:#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

BD_2020_input3_miss11<-
BD_2020_input3_miss10 %>% 
   dplyr::left_join(a61_imputed[,c("amelia_fit_imputations_imp1_sbj_num","a61_imputation")], by=c("sbj_num"="amelia_fit_imputations_imp1_sbj_num")) %>% 
    dplyr::mutate(a61=factor(dplyr::case_when(is.na(a61)~as.character(a61_imputation),
                                                  TRUE~as.character(a61)))) %>% 
  data.frame()

#3082647 - ties
#3114975
#3115200

#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#table(BD_2020_input3_miss18$a61,exclude = NULL)

if(nrow(BD_2020_input3_miss11)-nrow(BD_2020_input3_miss10)>0){
  warning("AGS: Some rows were added in the imputation")}


As a result of the imputations, some values were not possible to impute (n=2).


Absence of Union in Primary Work

We also imputed the single-item question whether the participant reported Economic Narrowness (org_sind), which had a small amount of missing values (n= 76).


# Ver distintos valores propuestos para sustancia de inciio
# names(amelia_fit$imputations$imp1)
org_sind_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$sbj_num,
       amelia_fit$imputations$imp1$org_sind,
       amelia_fit$imputations$imp2$org_sind,
       amelia_fit$imputations$imp3$org_sind,
       amelia_fit$imputations$imp4$org_sind,
       amelia_fit$imputations$imp5$org_sind,
       amelia_fit$imputations$imp6$org_sind,
       amelia_fit$imputations$imp7$org_sind,
       amelia_fit$imputations$imp8$org_sind,
       amelia_fit$imputations$imp9$org_sind,
       amelia_fit$imputations$imp10$org_sind,
       amelia_fit$imputations$imp11$org_sind,
       amelia_fit$imputations$imp12$org_sind,
       amelia_fit$imputations$imp13$org_sind,
       amelia_fit$imputations$imp14$org_sind,
       amelia_fit$imputations$imp15$org_sind,
       amelia_fit$imputations$imp16$org_sind,
       amelia_fit$imputations$imp17$org_sind,
       amelia_fit$imputations$imp18$org_sind,
       amelia_fit$imputations$imp19$org_sind,
       amelia_fit$imputations$imp20$org_sind,
       amelia_fit$imputations$imp21$org_sind,
       amelia_fit$imputations$imp22$org_sind,
       amelia_fit$imputations$imp23$org_sind,
       amelia_fit$imputations$imp24$org_sind,
       amelia_fit$imputations$imp25$org_sind,
       amelia_fit$imputations$imp26$org_sind,
       amelia_fit$imputations$imp27$org_sind,
       amelia_fit$imputations$imp28$org_sind,
       amelia_fit$imputations$imp29$org_sind,
       amelia_fit$imputations$imp30$org_sind,
       amelia_fit$imputations$imp31$org_sind,
       amelia_fit$imputations$imp32$org_sind,
       amelia_fit$imputations$imp33$org_sind,
       amelia_fit$imputations$imp34$org_sind,
       amelia_fit$imputations$imp35$org_sind,
       amelia_fit$imputations$imp36$org_sind,
       amelia_fit$imputations$imp37$org_sind,
       amelia_fit$imputations$imp38$org_sind,
       amelia_fit$imputations$imp39$org_sind,
       amelia_fit$imputations$imp40$org_sind,
       amelia_fit$imputations$imp41$org_sind,
       amelia_fit$imputations$imp42$org_sind,
       amelia_fit$imputations$imp43$org_sind,
       amelia_fit$imputations$imp44$org_sind,
       amelia_fit$imputations$imp45$org_sind,
       amelia_fit$imputations$imp46$org_sind,
       amelia_fit$imputations$imp47$org_sind,
       amelia_fit$imputations$imp48$org_sind,
       amelia_fit$imputations$imp49$org_sind,
       amelia_fit$imputations$imp50$org_sind,
       amelia_fit$imputations$imp51$org_sind,
       amelia_fit$imputations$imp52$org_sind,
       amelia_fit$imputations$imp53$org_sind,
       amelia_fit$imputations$imp54$org_sind,
       amelia_fit$imputations$imp55$org_sind,
       amelia_fit$imputations$imp56$org_sind,
       amelia_fit$imputations$imp57$org_sind,
       amelia_fit$imputations$imp58$org_sind,
       amelia_fit$imputations$imp59$org_sind,
       amelia_fit$imputations$imp60$org_sind,
       amelia_fit$imputations$imp61$org_sind
       ) %>% 
  data.frame() %>% 
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.org_sind:amelia_fit.imputations.imp61.org_sind),~dplyr::case_when(grepl("Yes",as.character(.))~1,TRUE~0), .names="yes_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.org_sind:amelia_fit.imputations.imp61.org_sind),~dplyr::case_when(grepl("No",as.character(.))~1,TRUE~0), .names="no_{col}"))%>%
  dplyr::mutate(org_sind_yes = base::rowSums(dplyr::select(., starts_with("yes_"))))%>%
  dplyr::mutate(org_sind_no = base::rowSums(dplyr::select(., starts_with("no_"))))%>%
  janitor::clean_names() %>% 
  dplyr::select(amelia_fit_imputations_imp1_sbj_num,org_sind_yes,org_sind_no)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

BD_2020_input3_miss12<-
BD_2020_input3_miss11 %>% 
   dplyr::left_join(org_sind_imputed, by=c("sbj_num"="amelia_fit_imputations_imp1_sbj_num")) %>% 
    dplyr::mutate(org_sind=factor(dplyr::case_when(is.na(org_sind) & org_sind_yes>org_sind_no~"Yes",
                                                  is.na(org_sind) & org_sind_yes<org_sind_no~"No",
                                                  TRUE~as.character(org_sind)))) %>% 
  data.frame()
 
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#table(BD_2020_input3_miss2$org_sind,exclude = NULL)

if(nrow(BD_2020_input3_miss12)-nrow(BD_2020_input3_miss11)>0){
  warning("AGS: Some rows were added in the imputation")}


As a result of the imputations, there were no missing values.


Sample Characteristics

We checked the characteristics of the sample depending on type of treatment (Residential or Outpatients).


#prop.table(table(CONS_C1_df_dup_SEP_2020_match$abandono_temprano_rec,CONS_C1_df_dup_SEP_2020_match$tipo_de_plan_res),2)

#añado los imputados
BD_2020_input3_match_not_miss<-
BD_2020_input3 %>% 
  dplyr::select(-c(unlist(miss_vars$variable))) %>% 
  dplyr::left_join(dplyr::select(BD_2020_input3_miss12,
                                 c(unlist(miss_vars$variable),"sbj_num")),by="sbj_num") %>% 
  dplyr::arrange(sbj_num) %>% 
  dplyr::mutate(sum_miss = base::rowSums(is.na(dplyr::select(.,unlist(miss_vars$variable))))) %>% 
  dplyr::group_by(sbj_num) %>% 
  dplyr::mutate(sum_miss=sum(sum_miss)) %>% 
  dplyr::ungroup() 
#table(BD_2020_input3_match_not_miss$sum_miss)

BD_2020_input3_match_not_miss_after_imp_descartados <-
  BD_2020_input3_match_not_miss %>% 
  dplyr::filter(sum_miss>0) %>% 
  dplyr::select(unlist(miss_vars$variable))

BD_2020_input3_match_not_miss_after_imp_conservados <-
  BD_2020_input3_match_not_miss %>% 
  dplyr::filter(sum_miss==0) %>% 
  dplyr::select(-sum_miss)

#BD_2020_input3_match_not_miss2<-
#  BD_2020_input3_match_not_miss[complete.cases(BD_2020_input3_match_not_miss[,..match.on_tot]),..match.on_tot] 

#if(nrow(BD_2020_input3_match_not_miss_after_imp_descartados)>0){warning("There are still missing values")}


Considering that some missing values were not able to imputation (due to ties in the candidate values for imputation or inconsistent values for imputations (n=2,users=0), we ended the process having 1,993 complete cases (users= 0).


kableone <- function(x, ...) {
  capture.output(x <- print(x,...))
  knitr::kable(x,
               format= "html", 
               col.names=c("Men","Women","p-value","test","SMD"),
               caption = 'Table 4. Summary Tables of Variables of Interest, Sex, after imputation',
               format.args= list(decimal.mark= ".", big.mark= ","))
}

cat_vars2<-
c("jef_hogar", "carga_dom", "educacion_media", "ocuprev_nocalif",
   "org_sind", "tamano_de_la_empresa", "sect_priv", 
  "atpub", "superv", "turnos", "antiguedad", "cont_def", 
  "gse", "a61", "edad", "zona")

for (i in 1:length(c(cat_vars2,"a52",
                       #candidates for analyses
"naq_sum","lid_des_sum","la_leyman","lid_des_dic","naq_dic", "naq_leyman","lid_des_leyman","naq_my_e","araucaria_acoso", "autonaq"
                       ))){
    x<-c(cat_vars2,"a52",
    "naq_sum","lid_des_sum","la_leyman","lid_des_dic","naq_dic", "naq_leyman","lid_des_leyman","naq_my_e","araucaria_acoso", "autonaq"     
         )[i]
    attr(BD_2020_input3_match_not_miss[[x]],"label")<-attr(BD_2020_input3[[x]],"label")
}

dsgn_BD_not_miss <- survey::svydesign(ids = ~1, 
                                      data = BD_2020_input3_match_not_miss,
                                      weights = ~weight)

dsgn_mujer_not_miss<- subset(dsgn_BD_not_miss,mujer==1)
dsgn_hombre_not_miss<- subset(dsgn_BD_not_miss,mujer==0)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

tab3<-
   svyCreateTableOne(vars = cat_vars2, 
                     data= dsgn_BD_not_miss, 
                     factorVars=cat_vars2,
                     strata = "a52", 
                     includeNA =T,
                      smd=T)

kableone(tab3, 
                       test=T, varLabels=T,noSpaces=T, printToggle=T, dropEqual=F,smd=T) %>% 
    kableExtra::kable_styling(bootstrap_options = c("striped", "hover","condensed"),font_size= 10) %>%
    kableExtra::row_spec(1, bold = T) %>%
    kableExtra::add_footnote(paste0("Note. Total cases (n= ",nrow(BD_2020_input3),"); NA= Missing values; SMD=Standardized mean difference;\nPercentages for categorical and ordinal variables"), notation = "none") %>% 
    kableExtra::scroll_box(width = "100%", height = "400px")
Table 4. Summary Tables of Variables of Interest, Sex, after imputation
Men Women p-value test SMD
n 1191.6 804.3
Head of Household (1=Yes) = Yes (%) 979.9 (82.2) 453.5 (56.4) <0.001 0.584
Unpaid Care Work at Home (A16) = Yes (%) 806.0 (67.6) 719.4 (89.5) <0.001 0.551
Had not Completed Secondary School (Art. Magdalena) = Yes (%) 285.5 (24.0) 142.3 (17.7) 0.002 0.155
Unskilled Workers (Plant and Machine Operators, Technicians, Transport Laborers & Unskilled Workers vs. Clerks, Sales Workers, Skilled Service Workers, Managers & Professionals) (Art. Magdalena) = Yes (%) 670.2 (56.2) 328.7 (40.9) <0.001 0.311
Absence of Union in Primary Work = Yes (%) 772.3 (64.8) 506.6 (63.0) 0.451 0.038
Size of the Workplace (Adimark) (%) 0.044 0.145
1 to 10 371.6 (31.2) 273.5 (34.0)
11 to 49 289.7 (24.3) 176.9 (22.0)
50 to 199 241.3 (20.2) 194.6 (24.2)
200 or more 289.1 (24.3) 159.2 (19.8)
Sector of Workplace (1=Private) = Public sector (%) 144.2 (12.1) 166.3 (20.7) <0.001 0.233
Customer service job = Job w/o customer service (%) 476.3 (40.0) 214.3 (26.6) <0.001 0.286
Supervises the tasks of other workers = Yes (%) 421.9 (35.4) 210.7 (26.2) <0.001 0.200
System of shifts = Yes (%) 273.6 (23.0) 96.7 (12.0) <0.001 0.291
Seniority in Workplace = Six months or less (%) 146.1 (12.3) 87.9 (10.9) 0.396 0.042
Fixed-term Contract (Art. Magdalena) = Yes (%) 353.4 (29.7) 221.7 (27.6) 0.353 0.046
GSE: Socioeconomic Groups (%) 0.001 0.226
ABC1 51.8 (4.3) 31.2 (3.9)
C2 333.9 (28.0) 305.3 (38.0)
C3 460.0 (38.6) 265.0 (32.9)
D 315.8 (26.5) 176.2 (21.9)
E 30.1 (2.5) 26.6 (3.3)
Economic Narrowness (%) NA 0.212
There is enough money, being able to save 217.7 (18.3) 124.1 (15.4)
There is just enough money, with no greater difficulties 678.1 (56.9) 406.0 (50.5)
There is not enough money, with greater difficulties 59.8 (5.0) 65.4 (8.1)
There is not enough money, with some difficulties 235.4 (19.8) 207.4 (25.8)
NA 0.7 (0.1) 1.4 (0.2)
Age (in groups) (%) <0.001 0.261
20 to 29 251.8 (21.1) 147.7 (18.4)
30 to 39 273.1 (22.9) 199.9 (24.9)
40 to 49 252.5 (21.2) 221.5 (27.5)
50 to 59 237.5 (19.9) 172.4 (21.4)
>=60 176.7 (14.8) 62.7 (7.8)
Metropolitan Region (%) 0.518 0.045
Santiago 767.3 (64.4) 530.9 (66.0)
Valparaíso 199.7 (16.8) 135.3 (16.8)
Concepción 224.6 (18.8) 138.0 (17.2)
Note. Total cases (n= 1995); NA= Missing values; SMD=Standardized mean difference;
Percentages for categorical and ordinal variables
#"tipo_de_plan_ambulatorio",
#https://cran.r-project.org/web/packages/tableone/vignettes/smd.html
#http://rstudio-pubs-static.s3.amazonaws.com/405765_2ce448f9bde24148a5f94c535a34b70e.html
#https://cran.r-project.org/web/packages/tableone/vignettes/introduction.html
#https://cran.r-project.org/web/packages/tableone/tableone.pdf
#https://www.rdocumentation.org/packages/tableone/versions/0.12.0/topics/CreateTableOne

## Construct a table 
#standardized mean differences of greater than 0.1

Match

#a61_imputed %>% dplyr::filter(ties==1)

BD_2020_input3_match = BD_2020_input3_match_not_miss%>% 
                        #ordenar por la variable de interés
                        dplyr::arrange(desc(a52)) %>% 
                        dplyr::select(c("sbj_num",cat_vars2,"a52")) %>% 
                        data.table::data.table()

BD_2020_input3_match$a52<-ifelse(BD_2020_input3_match$a52=="Women",1,0)

BD_2020_input3_match[!complete.cases(BD_2020_input3_match), ]
##    sbj_num jef_hogar carga_dom educacion_media ocuprev_nocalif org_sind
## 1: 3152186        No       Yes              No              No      Yes
## 2: 3114976        No       Yes             Yes             Yes      Yes
##    tamano_de_la_empresa      sect_priv                atpub superv turnos
## 1:            50 to 199 Private sector Customer service job    Yes     No
## 2:              1 to 10 Private sector Customer service job    Yes     No
##              antiguedad cont_def gse  a61     edad       zona a52
## 1: More than six months       No  C2 <NA> 40 to 49   Santiago   1
## 2: More than six months      Yes   D <NA> 40 to 49 Valparaíso   0
if( any(is.na(BD_2020_input3_match)) ) warning(paste0("There are still missing cases in the data base. Considering that, it is not possible to keep doing the matching unless ",nrow(BD_2020_input3_match[!complete.cases(BD_2020_input3_match),])," row(s) are deleted"))
## Warning: There are still missing cases in the data base. Considering that, it is
## not possible to keep doing the matching unless 2 row(s) are deleted
BD_2020_input3_match<- BD_2020_input3_match[complete.cases(BD_2020_input3_match), ]

for (i in 1:c(length(c(cat_vars2,"a52")))){
    x<-c(cat_vars2,"a52")[i]
    attr(BD_2020_input3_match[[x]],"label")<-attr(BD_2020_input3[[x]],"label")
}

no_mostrar=0
if(no_mostrar==1){
  for (i in 1:c(length(c(cat_vars2,"a52")))){
      x<-c(cat_vars2,"a52")[i]
      print(table(BD_2020_input3_match[[x]]))
  }
}

library(designmatch)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
# 1. Gurobi installation

#For an exact solution, we strongly recommend running designmatch either with CPLEX or Gurobi.  Between these two solvers, the R interface of Gurobi is considerably easier to install.  Here we provide general instructions for manually installing Gurobi and its R interface in Mac and Windows machines.

#1. Create a free academic license
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/creating_a_new_academic_li.html

#2. Install the software
#   2.1. In http://www.gurobi.com/index, go to Downloads > Gurobi Software
#   2.2. Choose your operating system and press download
#
#3. Retrieve and set up your Gurobi license
#   2.1. Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/retrieving_and_setting_up_.html
#   2.2. Then follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/retrieving_a_free_academic.html
#
#4. Test your license
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/testing_your_license.html
#
#5. Install the R interface of Gurobi   
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/r_installing_the_r_package.html
#   * In Windows, in R run the command install.packages("PATH\\gurobi_7.X-Y.zip", repos=NULL) where path leads to the file gurobi_7.X-Y.zip (for example PATH=C:\\gurobi702\\win64\\R; note that the path may be different in your computer), and "7.X-Y" refers to the version you are installing.
#   * In MAC, in R run the command install.packages('PATH/gurobi_7.X-Y.tgz', repos=NULL) where path leads to the file gurobi_7.X-Y.tgz (for example PATH=/Library/gurobi702/mac64/R; note that the path may be different in your computer), and "7.X-Y" refers to the version you are installing.
#       
#6. Test the installation 
#   Load the library and run the examples therein
#   * A possible error that you may get is the following: "Error: package ‘slam’ required by ‘gurobi’ could not be found". If that case, install.packages('slam') and try again.
#   You should be all set!

#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:

#Indicador de tratamiento
t_ind= as.numeric(unlist(data.table::data.table(BD_2020_input3_match$a52)))

# Moment balance: constrain differences in means to be at most 0.1 standard deviations apart
#:#:#:#:#:#:#:#:#:#:#:#:#:
#######mom_covs is a matrix where each column is a covariate whose mean is to be balanced
#######mom_tols is a vector of tolerances for the maximum difference in means for the covariates in mom_covs
#######mom_targets is a vector of target moments (e.g., means) of a distribution to be approximated by matched sampling. is optional, but if #######mom_covs is specified then mom_tols needs to be specified too
#######The lengths of mom_tols and mom_target have to be equal to the number of columns of mom_covs
#mom_covs = cbind(as.numeric(BD_2020_input3_match$a53))
#mom_tols = absstddif(mom_covs, t_ind, .2)
#mom = list(covs = mom_covs, tols = mom_tols, targets = NULL)

# Fine balance
#is a matrix where each column is a nominal covariate for fine balance
fine_covs = cbind(BD_2020_input3_match$jef_hogar, 
                 BD_2020_input3_match$carga_dom, 
                 BD_2020_input3_match$educacion_media, 
                 BD_2020_input3_match$ocuprev_nocalif, 
                 BD_2020_input3_match$org_sind,
                 BD_2020_input3_match$tamano_de_la_empresa,
                 BD_2020_input3_match$sect_priv,
                 BD_2020_input3_match$atpub,
                 BD_2020_input3_match$superv,
                 BD_2020_input3_match$turnos,
                 BD_2020_input3_match$antiguedad,
                 BD_2020_input3_match$cont_def,
                 BD_2020_input3_match$gse,
                 BD_2020_input3_match$a61,
                 BD_2020_input3_match$edad,
                 BD_2020_input3_match$zona)
fine = list(covs = fine_covs)

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

require(slam)
# Solver options
#default solver is glpk with approximate = 1
#For an exact solution, we strongly recommend using cplex or gurobi as they are much faster than the other solvers, but they do require a license (free for academics, but not for people outside universities)
t_max = 60*60*6
solver = list(name = "gurobi", #cplex, glpk, gurobi and symphony
              t_max = t_max, #t_max is a scalar with the maximum time limit for finding the matches.within this time limit, a partial, suboptimal solution is given
              approximate = 0,#. If approximate = 1 (the default), an approximate solution is found via a relaxation of the original integer program
              round_cplex = 0, 
              trace = 0)#turns the optimizer output on
library(gurobi)

#MATCH
start.time <- Sys.time()
set.seed(2125)
out = designmatch::bmatch(t_ind, #ES NECESARIO QUE LOS TRATAMIENTOS ESTEN ORDENADOS Y LOS OTROS VECTORES TAMBIËN 
                #mom = mom,# ya los definí list(covs = mom_covs, tols = mom_tols, targets = mom_targets), 
                fine = fine, 
                solver = solver,
                n_controls= 1)
##   Building the matching problem... 
##   Gurobi optimizer is open... 
##   Finding the optimal matches... 
##   Optimal matches found
end.time <- Sys.time()
time.taken <- end.time - start.time

paste0("Time taken in process:");time.taken
## [1] "Time taken in process:"
## Time difference of 2.139692 hours
# Fine balance (note here we are getting an approximate solution)
#for (i in 1:ncol(fine_covs)) {     
#   print(finetab(fine_covs[, i], t_id_1, c_id_1))
#}
# Indices of the treated units and matched controls
t_id_1 = out$t_id  
c_id_1 = out$c_id   

paste0("No. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))
## [1] "No. of treatments: 631; No. of controls: 631"
d_match = BD_2020_input3_match[c(t_id_1, c_id_1), ]
#cuidado, el anterior me encontró más del mismo control para un tratado
#por eso ocuparé el de más abajo


Explore Results of the Matching


BD_2020_input3_match_lab<-BD_2020_input3_match[,..cat_vars2]
X_mat<-matrix()
for (i in 1:length(cat_vars2)){
  x<-cat_vars2[i]
X_mat<-c(X_mat,
             stringr::str_replace(stringi::stri_paste(paste(stringi::stri_wrap(as.character(attr(BD_2020_input3_match_lab[[x]],"label")), width =20), "\n"), collapse=''),"\n$","")
  )
}

X_mat_lab<-cbind(
            "Head of Household"=BD_2020_input3_match_lab$jef_hogar, 
            "Unpaid Care Work at Home"=BD_2020_input3_match_lab$carga_dom,
            "Incomplete Secondary School"=BD_2020_input3_match_lab$educacion_media,
            "Unskilled Workers"=BD_2020_input3_match_lab$ocuprev_nocalif,
            "Size of the Workplace"=BD_2020_input3_match_lab$tamano_de_la_empresa,
            "Sector of the Workplace"=BD_2020_input3_match_lab$sect_priv, 
            "Customer service"=BD_2020_input3_match_lab$atpub,
            "Supervises"=BD_2020_input3_match_lab$superv,
            "System of shifts"=BD_2020_input3_match_lab$turnos,
            "Seniority in the Workplace"=BD_2020_input3_match_lab$antiguedad,
            "Fixed Contract"=BD_2020_input3_match_lab$cont_def,
            "GSE: Socioeconomic Groups"=BD_2020_input3_match_lab$gse,
            "Economic Narrowness"=BD_2020_input3_match_lab$a61,
            "Age (in groups)"=BD_2020_input3_match_lab$edad,
            "Metropolitan Region"=BD_2020_input3_match_lab$zona,
             "Union in the Workplace"=BD_2020_input3_match_lab$org_sind
            )

vline = 0.1
loveplot(X_mat=X_mat_lab, t_id=t_id_1, c_id=c_id_1, v_line=vline, legend_position = "topright")

for (i in c(1:length(BD_2020_input3_match))){
#print(attr(BD_2020_input3_match[[i]],"label"))
}


Balance

for (i in 1:c(length(d_match[,-1]))){
    x<-names(d_match[,-1])[i]
    attr(d_match[[x]],"label")<-attr(BD_2020_input3[[x]],"label")
}

options(knitr.kable.NA = '')
covs0_unmatch <- subset(BD_2020_input3_match, select = -c(sbj_num, a52))#subset=,
covs0_match <- subset(d_match, select = -c(sbj_num, a52))#subset=,
#_#_#_#_#_#_generar pareamientos prepost#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
library(cobalt)
bal1_nomatch<-bal.tab(covs0_unmatch, treat = BD_2020_input3_match$a52,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))

bal1_nomatch$Balance[,2]<-round(bal1_nomatch$Balance[,2],2)
bal1_nomatch$Balance[,4]<-round(bal1_nomatch$Balance[,4],2)
bal1_nomatch$Balance[,6]<-round(bal1_nomatch$Balance[,6],2)

bal1_match<-cobalt::bal.tab(covs0_match, treat = d_match$a52,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))

bal1_match$Balance[,2]<-round(bal1_match$Balance[,2],2)
bal1_match$Balance[,4]<-round(bal1_match$Balance[,4],2)
bal1_match$Balance[,6]<-round(bal1_match$Balance[,6],2)

#df_lab2<-data.frame()
#for (i in 1:c(length(names(d_match)))){
#  x<-names(d_match)[i]
#df_lab2<- rbind(df_lab2,cbind(x,attr(d_match[[x]],"label")))
#}

#GENERACION DE COMPARACIONES ENTRE GRUPOS, UNA VEZ PAREADO Y DESPUÉS

var_names<- 
  list("autonaq_Yes"="Self-label WB",
    "frec_acos21_1_No"="Self-label Frequent WB- No",
    "frec_acos21_1_Yes"="Self-label Frequent WB- Yes",
    "frec_acos21_1_Not Apply"="Self-label Frequent WB- Not Apply",
    "a22_No"= "Psych Harassment- No",
    "a22_Yes"="Psych Harassment- Yes",
    "a22_Not Apply"= "Psych Harassment- Not Apply",
    "a24_No"="Phys Harassment- No",
    "a24_Yes"="Phys Harassment- Yes",
    "a24_Not Apply"="Phys Harassment- Not Apply",
    "a26_No"="Sexual Harassment- No",
    "a26_Yes"="Sexual Harassment- Yes",
    "a26_Not Apply"="Sexual Harassment- Not Apply",
    "jerarquia_acoso_tot_comp_Yes"= "Harass-Peers",
    "jerarquia_acoso_tot_sup_Yes"= "Harass-Superiors",
    "jerarquia_acoso_tot_sub_Yes"="Harass-Subordinates",
    "jerarquia_acoso_tot_ext_Yes"="Harass-External",
    "climasex_Yes"="Sexist Climate- Yes",
    "latdec_dic_Yes"="Low Decisional Latitude- Yes",
    "aposoc_tot_Yes"="Low Social Support- Yes",
    "democ_dic_Yes"= "High Job Demands",
    "desbalance_dic_Yes"="Effort-Reward Imb",
    "salgen_Yes"="Bad Overall Health",
    "depre_Yes"="Depressive Symptoms",
    "psicotrop_Yes"="Psychotropic Consumption", 
    "auditc_2"="Risky drinking",
    "superv_Yes"="Supervisor",
    "superv_No"="Not a Supervisor",
    "jef_hogar_Yes"="Head of Household", #
    "jef_hogar_No"="Not the Head of Household", #
    "educacion_media_Yes"="Incomplete Sec School", #
    "educacion_media_No"="Complete Sec School",
    "cont_def_Yes"="Fixed-term Contract", #
    "cont_def_No"="Indefinite Contract",
    "ocuprev_nocalif_Yes"="Unskilled Work", #
    "ocuprev_nocalif_No"="Skilled Work",
    "carga_dom_Yes"="Unpaid Work At Home", #
    "carga_dom_No"="No Unpaid Work At Home", #
    "a61_There is enough money, being able to save"="Ec Narrowness- Enough money",
    "a61_There is just enough money, with no greater difficulties"="Ec Narrowness- Just money",
    "a61_There is not enough money, with some difficulties"="Ec Narrowness- Not Enough money, some...",
    "a61_There is not enough money, with greater difficulties"="Ec Narrowness- Not Enough money, greater...",
    "org_sind_Yes"="Absence of Union in the Workplace", #
    "org_sind_No"="Union in the Workplace",
    "zona_Santiago"="Region- Great Santiago",
    "zona_Valparaíso"="Region- Great Valparaíso",
    "zona_Concepción"="Region- Great Concepción",
    "a53"="Age",
    "tamano_de_la_empresa_1 to 10"="Size of Workplace-1 to 10", #
    "tamano_de_la_empresa_11 to 49"="Size of Workplace-11 to 49", #
    "tamano_de_la_empresa_50 to 199"="Size of Workplace-50 to 199", #
    "tamano_de_la_empresa_200 or more"="Size of Workplace-200 or more", #
    "sect_priv_Public sector"="Public Sector",
    "sect_priv_Private sector"="Private Sector",
    "edad_20 to 29"= "Age (in groups)- 20 to 29",
    "edad_30 to 39"= "Age (in groups)- 30 to 39",
    "edad_40 to 49"= "Age (in groups)- 40 to 49",
    "edad_50 to 59"= "Age (in groups)- 50 to 59",
    "edad_>=60"= "Age (in groups)- >=60",
    "gse_ABC1"= "GSE: Socioeconomic Groups-ABC1",
    "gse_C2"="GSE: Socioeconomic Groups-C2",
    "gse_C3"="GSE: Socioeconomic Groups-C3",
    "gse_D"="GSE: Socioeconomic Groups-D",
    "gse_E"="GSE: Socioeconomic Groups-E",
    "antiguedad_Six months or less"="Seniority- 6 months or less in workplace",
    "antiguedad_More than six months"="Seniority- >6 months in workplace",
    "turnos_Yes"= "System of shifts",
    "turnos_No"="No system of shifts",
    "atpub_Job w/o customer service"="Does not work in customer service",
    "atpub_Customer service job"="Works in Customer service"
    )

var.names<-data.table(data.frame(unlist(var_names)),keep.rownames = T) %>% janitor::clean_names()

pre_matched_matched<-
data.table::data.table(bal1_nomatch$Balance[,1:6],keep.rownames = T) %>%
  dplyr::arrange(-abs(Diff.Un)) %>% 
  dplyr::left_join(data.table::data.table(bal1_match$Balance[,2:6],keep.rownames = T),by="rn") %>% 
  dplyr::left_join(var.names,by="rn") %>% 
  dplyr::select(unlist_var_names,everything()) %>% 
  dplyr::select(-rn) 

#data.table::data.table(bal1_nomatch$Balance,keep.rownames = T)[,1]

pre_matched_matched%>% 
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 5. Covariate Balance in the Variables of Interest"),
               col.names = c("Variables","Nature of Variables", "SMDs","Threshold","Variance Ratios","Threshold","KS","SMDs","Threshold","Variance Ratios","Threshold","KS"),
               align =c('l',rep('c', 101))) %>%
  kableExtra::add_header_above(c(" "," ","Unadjusted" = 5, "Adjusted" = 5)) %>% 
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 10) %>%
  kableExtra::add_footnote( c(paste("Note.",paste0("Unadjusted (n=",dim(covs0_unmatch)[1] %>% format(big.mark=","),")"),";",paste0("Adjusted (n=",dim(covs0_match)[1] %>% format(big.mark=","),")"),";",paste0("Total pairs: ",length(c_id_1) %>% format(big.mark=",")))), 
                            notation = "none") %>%
  kableExtra::scroll_box(width = "100%", height = "375px") 
Table 5. Covariate Balance in the Variables of Interest
Unadjusted
Adjusted
Variables Nature of Variables SMDs Threshold Variance Ratios Threshold KS SMDs Threshold Variance Ratios Threshold KS
Unpaid Work At Home Binary 0.59 Not Balanced, >0.1 0.24 0.00 Balanced, <0.1 0.00
Head of Household Binary -0.57 Not Balanced, >0.1 0.25 0.02 Balanced, <0.1 0.01
Does not work in customer service Binary -0.34 Not Balanced, >0.1 0.16 0.00 Balanced, <0.1 0.00
Unskilled Work Binary -0.33 Not Balanced, >0.1 0.16 0.01 Balanced, <0.1 0.00
System of shifts Binary -0.29 Not Balanced, >0.1 0.11 0.01 Balanced, <0.1 0.00
Public Sector Binary 0.27 Not Balanced, >0.1 0.10 0.00 Balanced, <0.1 0.00
Age (in groups)- >=60 Binary -0.26 Not Balanced, >0.1 0.08 0.00 Balanced, <0.1 0.00
GSE: Socioeconomic Groups-C2 Binary 0.23 Not Balanced, >0.1 0.11 -0.01 Balanced, <0.1 0.00
Supervisor Binary -0.21 Not Balanced, >0.1 0.10 0.00 Balanced, <0.1 0.00
Incomplete Sec School Binary -0.17 Not Balanced, >0.1 0.07 0.00 Balanced, <0.1 0.00
GSE: Socioeconomic Groups-D Binary -0.14 Not Balanced, >0.1 0.06 0.01 Balanced, <0.1 0.00
Ec Narrowness- Not Enough money, some… Binary 0.14 Not Balanced, >0.1 0.06 0.00 Balanced, <0.1 0.00
Region- Great Concepción Binary 0.13 Not Balanced, >0.1 0.06 0.00 Balanced, <0.1 0.00
Size of Workplace-200 or more Binary -0.12 Not Balanced, >0.1 0.05 0.00 Balanced, <0.1 0.00
Age (in groups)- 40 to 49 Binary 0.12 Not Balanced, >0.1 0.05 0.00 Balanced, <0.1 0.00
Region- Great Santiago Binary -0.11 Not Balanced, >0.1 0.05 0.00 Balanced, <0.1 0.00
Ec Narrowness- Not Enough money, greater… Binary 0.10 Balanced, <0.1 0.02 -0.01 Balanced, <0.1 0.00
Size of Workplace-1 to 10 Binary 0.09 Balanced, <0.1 0.04 0.00 Balanced, <0.1 0.00
GSE: Socioeconomic Groups-C3 Binary -0.09 Balanced, <0.1 0.04 0.00 Balanced, <0.1 0.00
Ec Narrowness- Enough money Binary -0.09 Balanced, <0.1 0.03 0.00 Balanced, <0.1 0.00
Ec Narrowness- Just money Binary -0.09 Balanced, <0.1 0.05 0.00 Balanced, <0.1 0.00
GSE: Socioeconomic Groups-ABC1 Binary -0.07 Balanced, <0.1 0.01 0.01 Balanced, <0.1 0.00
Age (in groups)- 30 to 39 Binary 0.07 Balanced, <0.1 0.03 -0.01 Balanced, <0.1 0.00
GSE: Socioeconomic Groups-E Binary 0.05 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Size of Workplace-50 to 199 Binary 0.04 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Fixed-term Contract Binary -0.04 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Age (in groups)- 20 to 29 Binary -0.04 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Size of Workplace-11 to 49 Binary -0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Seniority- 6 months or less in workplace Binary -0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Age (in groups)- 50 to 59 Binary 0.02 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region- Great Valparaíso Binary -0.02 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Absence of Union in the Workplace Binary 0.00 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Note. Unadjusted (n=1,993) ; Adjusted (n=1,262) ; Total pairs: 631


columna_dummy <- function(df, columna) {
  df %>% 
  mutate_at(columna, ~paste(columna, eval(as.symbol(columna)), sep = "_")) %>% 
    mutate(valor = 1) %>% 
    spread(key = columna, value = valor, fill = 0)
}
BD_2020_input3_match_dum<-BD_2020_input3_match
for (i in c(1:length(cat_vars2))){#catVars[-10] excluding treatment indicator
  cat<-as.character(cat_vars2[i])#catVars[-10] excluding treatment indicator
  BD_2020_input3_match_dum<-columna_dummy(BD_2020_input3_match_dum,cat)
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

CONS_SEP_match_dum = data.table::data.table(BD_2020_input3_match_dum %>% 
                                            #dplyr::arrange(factor(sbj_num, levels = d_match$sbj_num)))
                                            dplyr::filter(sbj_num %in% unlist(d_match$sbj_num)))

#covs0_match_disc<-data.table::data.table(covs0_match_disc)
covs_dum_unmatch <- subset(BD_2020_input3_match_dum,select= -c(sbj_num))#subset=,
covs_dum_match <- subset(CONS_SEP_match_dum,select= -c(sbj_num))#subset=, , frec_acos21_1

#############################################
# Standardized differences before matching
#############################################
smd_internal<-function(var,db, tr){
  #db<-as.data.frame(db)
  #var= vector of vars to check balance
  #db= database in character format
  #tr=character format of the treatment variable 
  smd_vec<-round(abs(mean(unlist(get(db)[which(get(db)[[tr]]==1),..var]),na.rm=F)- mean(unlist(get(db)[which(get(db)[[tr]]==0),..var]),na.rm=F))/
          sqrt(((sd(unlist(get(db)[which(get(db)[[tr]]==1),..var]),na.rm=F))^2 + (sd(unlist(get(db)[which(get(db)[[tr]]==0),..var]),na.rm=F))^2 )/2),2)
#return(assign(paste0(var,"_smd"),smd_vec,envir=.GlobalEnv))
  return(print(smd_vec))
}

smd_df<-data.frame()
for (i in 1:length(covs_dum_unmatch)){
  smd_df<-rbind(smd_df,cbind.data.frame(names(covs_dum_unmatch)[i],
                             smd_internal(names(covs_dum_unmatch)[i],"covs_dum_unmatch","a52")))
}
smd_df<-
smd_df %>% 
  dplyr::rename("vars"= !!names(.[1]),"smd"= !!names(.[2])) %>% 
  dplyr::filter(!grepl("a52",vars))%>% 
  dplyr::arrange(desc(smd))
##################################################
# Standardized differences after matching
##################################################

smd_df2<-data.frame()
for (i in 1:length(names(covs_dum_match))){
  smd_df2<-rbind(smd_df2,cbind.data.frame(names(covs_dum_match)[i],
                             smd_internal(names(covs_dum_match)[i],"covs_dum_match","a52")))
}
smd_df2<-
  smd_df2 %>% 
  dplyr::rename("vars"= !!names(.[1]),"smd"= !!names(.[2])) %>% 
  dplyr::filter(!grepl("a52",vars))

smd_df_prev_match_after_match<-
  smd_df %>% 
  dplyr::left_join(smd_df2,by="vars") %>% 
  dplyr::rename("Before\nMatching"= !!names(.[2]),"After\nMatching"= !!names(.[3])) %>% 
  melt() %>% 
  dplyr::left_join(var.names,by=c("vars"="rn")) %>% 
                     dplyr::select(unlist_var_names,everything())
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
# Data set with stand. diff. before post matching

# Generate plot
loveplot_bal3 = ggplot(data = smd_df_prev_match_after_match, mapping = aes(x = reorder(unlist_var_names, value), y = value,
                                                    group = variable, color = variable)) +
  geom_hline(yintercept = 0.1, color = "black", size = 0.2, linetype="dashed") +
  geom_hline(yintercept = 0.2, color = "black", size = 0.2, linetype="longdash") +
  coord_flip() +
  theme_bw() + 
  theme(legend.key = element_blank()) +
  labs(y = "Average absolute standardized differences in means",x = "",color="",shape="") +
  scale_y_continuous(limits = c(0,max(smd_df_prev_match_after_match$value)+.1), breaks = seq(0,1.5,by=.1)) +
  scale_color_manual(values=c("gray20","black")) + 
  geom_point(aes(shape=variable),size=4) +
  scale_shape_manual(values=c(8,16)) + 
  theme(text = element_text(size=13))

loveplot_bal3
Figure 5. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

Figure 5. Love plot of the Matched Sample in Covariates v/s Unmatched Sample


kableone <- function(x, ...) {
  capture.output(x <- print(x,...))
  knitr::kable(x,
               format= "html", 
               col.names=c("Men","Women","p-value","test","SMD"),
               caption = 'Table 6. Summary Tables of Variables of Interest by Sex',
               format.args= list(decimal.mark= ".", big.mark= ","))
}
#c("naq_sum","lid_des_sum","la_leyman","lid_des_dic","naq_dic", "naq_leyman","lid_des_leyman","naq_my_e","araucaria_acoso", "autonaq")

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
matched_groups<-
cbind.data.frame(treated=BD_2020_input3_match[t_id_1,sbj_num],control=BD_2020_input3_match[c_id_1,sbj_num],
                 rn=1:length(BD_2020_input3_match[c_id_1,sbj_num]))

total_sample<-BD_2020_input3
matched_sample1<-d_match %>% 
                  dplyr::left_join(dplyr::select(BD_2020_input3,sbj_num,weight,
                  #add dependent variables                               
                  auditc, naq_my_e
                  ),by="sbj_num") %>% 
                  dplyr::mutate(auditc=ifelse(auditc=="Yes",1,0)) %>% 
  dplyr::left_join(matched_groups[,c("treated","rn")],by=c("sbj_num"="treated")) %>% 
  dplyr::left_join(matched_groups[,c("control","rn")],by=c("sbj_num"="control")) %>% 
  dplyr::mutate(group_num=ifelse(is.na(rn.x),rn.y,rn.x)) %>% 
  dplyr::select(-rn.x,-rn.y) %>% 
  dplyr::mutate(a52=ifelse(a52==1,"Women","Men"))

attr(matched_sample1$auditc,"label")<-"Risky drinking (Short AUDIT)"

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

#total_sample$k2_elevado<-car::recode(ifelse(total_sample$k2_elevado==2,1,0), "1='1. High Psychological Distress'; 0='0.Low Psychological Distress'")
#matched_sample1$k2_elevado<-car::recode(matched_sample1$k2_elevado,"1='1. High Psychological Distress'; 0='0.Low Psychological Distress'")

#matched_sample1$k2_elevado_num <-ifelse(matched_sample1$k6_2>6,1,0)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
dsgn_matched_sample1 <- survey::svydesign(ids = ~1, data = matched_sample1, weights = ~weight)

dsgn_matched_mujer<- subset(dsgn_matched_sample1,a52=="Women")
dsgn_matched_hombre<- subset(dsgn_matched_sample1,a52=="Men")


dsgn_BD_not_miss <- survey::svydesign(ids = ~1, 
                                      data = BD_2020_input3_match_not_miss %>%  dplyr::mutate(auditc=ifelse(auditc=="Yes",1,0)),
                                      weights = ~weight)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

#for (i in 1:c(length(c(tot_vars3,"testigo")))){
#    x<-c(tot_vars3,"testigo")[i]
#    attr(BD_2020_input3_match_not_miss[[x]],"label")<-attr(BD_2020_input3[[x]],"label")
#}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

tab5<-
   svyCreateTableOne(vars = dplyr::select(matched_sample1,-all_of(c("sbj_num","a52","weight","group_num"))) %>% names(),
                     data= dsgn_matched_sample1, 
                     factorVars=dplyr::select(matched_sample1,-all_of(c("sbj_num","a52","weight"))) %>% names(),
                     strata = "a52", 
                     includeNA =T,
                      smd=T)

kableone(tab5, nonnormal= c("weight"),
                       test=T, varLabels=T,noSpaces=T, printToggle=T, dropEqual=F,smd=T) %>% 
    kableExtra::kable_styling(bootstrap_options = c("striped", "hover","condensed"),font_size= 13) %>%
    kableExtra::row_spec(1, bold = T) %>%
    kableExtra::add_footnote(paste0("Note. Total cases (n= ",nrow(BD_2020_input3),"); NA= Missing values;\nMedian and Interquartile Ranges (IQR) for continous variables; Percentages for categorical and ordinal variables;\n We did not matched by Sex, as it could be an effect modifier of the relationship with Distress"), notation = "none") %>% 
    kableExtra::scroll_box(width = "100%", height = "400px")
Table 6. Summary Tables of Variables of Interest by Sex
Men Women p-value test SMD
n 767.8 521.9
Head of Household (1=Yes) = Yes (%) 579.2 (75.4) 388.2 (74.4) 0.704 0.025
Unpaid Care Work at Home (A16) = Yes (%) 656.7 (85.5) 441.2 (84.5) 0.642 0.028
Had not Completed Secondary School (Art. Magdalena) = Yes (%) 155.7 (20.3) 106.4 (20.4) 0.969 0.002
Unskilled Workers (Plant and Machine Operators, Technicians, Transport Laborers & Unskilled Workers vs. Clerks, Sales Workers, Skilled Service Workers, Managers & Professionals) (Art. Magdalena) = Yes (%) 361.3 (47.1) 248.3 (47.6) 0.871 0.010
Absence of Union in Primary Work = Yes (%) 499.0 (65.0) 330.2 (63.3) 0.570 0.036
Size of the Workplace (Adimark) (%) 0.621 0.084
1 to 10 261.1 (34.0) 166.5 (31.9)
11 to 49 182.0 (23.7) 113.7 (21.8)
50 to 199 170.1 (22.2) 130.4 (25.0)
200 or more 154.5 (20.1) 111.4 (21.3)
Sector of Workplace (1=Private) = Public sector (%) 117.0 (15.2) 77.8 (14.9) 0.881 0.009
Customer service job = Job w/o customer service (%) 222.6 (29.0) 164.8 (31.6) 0.377 0.056
Supervises the tasks of other workers = Yes (%) 236.4 (30.8) 163.5 (31.3) 0.854 0.012
System of shifts = Yes (%) 117.1 (15.2) 83.9 (16.1) 0.710 0.023
Seniority in Workplace = Six months or less (%) 98.3 (12.8) 56.3 (10.8) 0.312 0.062
Fixed-term Contract (Art. Magdalena) = Yes (%) 221.3 (28.8) 136.0 (26.1) 0.323 0.062
GSE: Socioeconomic Groups (%) 0.803 0.081
ABC1 29.2 (3.8) 27.1 (5.2)
C2 247.6 (32.3) 164.5 (31.5)
C3 283.5 (36.9) 181.5 (34.8)
D 189.1 (24.6) 135.2 (25.9)
E 18.3 (2.4) 13.6 (2.6)
Economic Narrowness (%) 0.988 0.023
There is enough money, being able to save 130.5 (17.0) 92.0 (17.6)
There is just enough money, with no greater difficulties 410.0 (53.4) 273.4 (52.4)
There is not enough money, with greater difficulties 49.6 (6.5) 33.3 (6.4)
There is not enough money, with some difficulties 177.6 (23.1) 123.3 (23.6)
Age (in groups) (%) 0.794 0.082
20 to 29 162.5 (21.2) 99.6 (19.1)
30 to 39 187.5 (24.4) 117.8 (22.6)
40 to 49 179.3 (23.4) 129.9 (24.9)
50 to 59 165.2 (21.5) 118.3 (22.7)
>=60 73.2 (9.5) 56.4 (10.8)
Metropolitan Region (%) 0.004 0.164
Santiago 487.1 (63.4) 364.2 (69.8)
Valparaíso 130.5 (17.0) 86.0 (16.5)
Concepción 150.2 (19.6) 71.7 (13.7)
Risky drinking (Short AUDIT) = 1 (%) 259.8 (33.8) 86.2 (16.5) <0.001 0.407
NAQ (Mikkelsen & Einarsen) = 2 (%) 86.0 (11.2) 46.3 (8.9) 0.221 0.077
Note. Total cases (n= 1995); NA= Missing values;
Median and Interquartile Ranges (IQR) for continous variables; Percentages for categorical and ordinal variables;
We did not matched by Sex, as it could be an effect modifier of the relationship with Distress
#External Harassment in the last 12 months = Yes (%)    10.1 (13.2) 4.0 (5.2)   0.048       0.282

#"tipo_de_plan_ambulatorio",
#https://cran.r-project.org/web/packages/tableone/vignettes/smd.html
#http://rstudio-pubs-static.s3.amazonaws.com/405765_2ce448f9bde24148a5f94c535a34b70e.html
#https://cran.r-project.org/web/packages/tableone/vignettes/introduction.html
#https://cran.r-project.org/web/packages/tableone/tableone.pdf
#https://www.rdocumentation.org/packages/tableone/versions/0.12.0/topics/CreateTableOne

## Construct a table 
#standardized mean differences of greater than 0.1


Inference

Table = xtabs(weight  ~ auditc+ naq_my_e+ a52,
              data= matched_sample1)
   ###  Note that the grouping variable is last in the xtabs function
data.frame(expand.grid(rev(attr(ftable(Table), "row.vars"))), unclass(ftable(Table))) %>% 
  dplyr::arrange(auditc,naq_my_e) %>% 
  dplyr::mutate(naq_my_e=ifelse(naq_my_e==2,"Workplace Bullying","No Workplace Bullying")) %>% 
  dplyr::mutate(auditc=ifelse(auditc==1,"Yes","No")) %>% 
  dplyr::group_by(naq_my_e) %>% 
  dplyr::mutate(X1_perc= X1/(X1+X2)) %>% 
  dplyr::mutate(X2_perc= X2/(X1+X2)) %>% 
  dplyr::mutate(X1=round(X1,0)) %>% 
  dplyr::mutate(X2=round(X2,0)) %>% 
  dplyr::mutate(X1=paste0(X1,"\n(",scales::percent(X1_perc),")")) %>% 
  dplyr::mutate(X2=paste0(X2,"\n(",scales::percent(X2_perc),")")) %>% 
  dplyr::select(-X1_perc,-X2_perc)%>% 
  dplyr::rename("Men"="X1","Women"="X2") %>% 
  dplyr::select(naq_my_e,auditc,`Men`, `Women`) %>% 
  assign("tabla_ponderada",.,envir=.GlobalEnv)


Table2 = xtabs(n  ~ auditc+ naq_my_e+ a52,
              data= 
           matched_sample1 %>% 
           dplyr::group_by(a52,naq_my_e,auditc) %>% 
           dplyr::summarise(n=n()) %>% 
           dplyr::filter(!is.na(naq_my_e),!is.na(a52),!is.na(auditc)))
## `summarise()` has grouped output by 'a52', 'naq_my_e'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'a52', 'naq_my_e'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'a52', 'naq_my_e'. You can override using the `.groups` argument.
data.frame(expand.grid(rev(attr(ftable(Table2), "row.vars"))), unclass(ftable(Table2))) %>% 
  dplyr::arrange(auditc,naq_my_e) %>% 
  dplyr::mutate(naq_my_e=ifelse(naq_my_e==2,"Workplace Bullying","No Workplace Bullying")) %>% 
  dplyr::mutate(auditc=ifelse(auditc==1,"Yes","No")) %>% 
  dplyr::group_by(naq_my_e) %>% 
  dplyr::mutate(X1_perc= X1/(X1+X2)) %>% 
  dplyr::mutate(X2_perc= X2/(X1+X2)) %>% 
  dplyr::mutate(X1=round(X1,0)) %>% 
  dplyr::mutate(X2=round(X2,0)) %>% 
  dplyr::mutate(X1=paste0(X1,"\n(",scales::percent(X1_perc),")")) %>% 
  dplyr::mutate(X2=paste0(X2,"\n(",scales::percent(X2_perc),")")) %>% 
  dplyr::select(-X1_perc,-X2_perc) %>% 
  dplyr::rename("Men"="X1","Women"="X2") %>% 
  dplyr::select(naq_my_e,auditc,`Men`, `Women`) %>% 
  assign("tabla_no_ponderada",.,envir=.GlobalEnv)


tabla_no_ponderada %>% 
  dplyr::left_join(tabla_ponderada,by=c("naq_my_e","auditc")) %>% 
knitr::kable(format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
             caption="Table 7. Count of Participants by Sex and AUD",
             col.names=c("Workplace Bullying", "Risky drinking", rep(c("Men", "Women"),2)),
             align =rep('c', 101))  %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 13) %>%
  kableExtra::add_header_above(c("","","Not-weighted"=2,"Weighted"=2)) %>% 
  kableExtra::add_footnote(c("Note. Pecentages by row; Counts for the weighted table were approximated"), notation = "none") %>% 
  kableExtra::scroll_box(width = "100%", height = "350px")
Table 7. Count of Participants by Sex and AUD
Not-weighted
Weighted
Workplace Bullying Risky drinking Men Women Men Women
No Workplace Bullying No 363 (43%) 484 (57%) 440 (52%) 405 (48%)
Workplace Bullying No 55 (57.9%) 40 (42.1%) 68 (69%) 30 (31%)
No Workplace Bullying Yes 196 (69%) 90 (31%) 242 (77%) 70 (23%)
Workplace Bullying Yes 17 (50.0%) 17 (50.0%) 18 (53%) 16 (47%)
Note. Pecentages by row; Counts for the weighted table were approximated
#https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/mantelhaen.test
#Performs a Cochran-Mantel-Haenszel chi-squared test of the null that two nominal variables are conditionally independent in each stratum, assuming that there is no three-way interaction.

#Test for homogeneity on 2 x 2 x k tables over strata (i.e., whether the log odds ratios are the same in all strata).
vcd::woolf_test(Table)
#El test de wolf no es significativo, por lo que ORs comunes es al menos potencialmente rasonable para ambos estratos
DescTools::BreslowDayTest(x = Table, OR = 1)
#Hay homogeneidad, por lo que se puede utilizar M_H
# Me dan algo distinto

#The asymptotic distribution is only valid if there is no three-way interaction. In the classical 2 by 2 by K case, this is equivalent to the conditional odds ratios in each stratum being identical. Currently, no inference on homogeneity of the odds ratios is performed.
#The Breslow-Day test can be used to check the null hypothesis that all odds ratios are equal. The Cochran-Mantel-Haenszel test can be thought of as a special case of the Breslow-Day test wherein the common odds ratio is assumed to be 1 

mantelhaen.test(Table)
## Evidencia de la asociación entre sexo y ansiedad al ajustar por dirección del servicio
# Evidencia de que hay una asociación
# En cada nivel de la comparación (hosp y servicio), tamizaje ansiedad y sexo no son indpendientes.
# Hay una interacción de 3 vías

#apply(Table, 3, function(x) (x[1,1]*x[2,2])/(x[1,2]*x[2,1]))


#ote that in R with glm.fit, binomial and quasibinomial are exactly the same, except that quasibinomial (1) removes the integer check, and (2) returns an AIC of NA.
require("sandwich")
fit_svyglm<-svyglm(auditc~a52*naq_my_e, design=dsgn_matched_sample1, family="quasibinomial")

svyglm_not_matched<-svyglm(auditc~a52*factor(naq_my_e), design=dsgn_BD_not_miss, family="quasibinomial")

cat("#### ","Table 8. Regression coefficients","\n")
## ####  Table 8. Regression coefficients
library(finalfit)
# Binomial example
## Note model family needs specified and exponentiation if desired
dependent = "auditc"
explanatory = c("a52*naq_my_e")

# Multivariable fit
fit_multi = dsgn_matched_sample1 %>%
  svyglmmulti(dependent, explanatory, family = "quasibinomial") %>%
  fit2df(exp = TRUE, estimate_name = "OR", estimate_suffix = " Matched")

fit_multi_no_match = dsgn_BD_not_miss %>%
  svyglmmulti(dependent, explanatory, family = "quasibinomial") %>%
  fit2df(exp = TRUE, estimate_name = "OR", estimate_suffix = " Unmatched")

fit_metrics = dsgn_matched_sample1 %>%
  svyglmmulti(dependent, explanatory, family = "quasibinomial") %>%
  ff_metrics()

fit_metrics_no_match = dsgn_BD_not_miss %>%
  svyglmmulti(dependent, explanatory, family = "quasibinomial")  %>%
  ff_metrics()
#Concordance statistic: C-statistic = 0.627, more than .5, it is slightly better than predicting an outcome than outcome chance.
#Hosmer DW, Lemeshow S. Applied Logistic Regression (2nd Edition). New York, NY: John Wiley & Sons; 2000.

#Like most goodness of fit tests, these small p-values (usually under 5%) mean that your model is not a good fit.
#But large p-values don’t necessarily mean that your model is a good fit, just that there isn’t enough evidence to say it’s a poor fit. 
# H&L = Chi-sq(8) 0.78 (p=0.999)

# Pipe together
matched_sample1 %>%
  dplyr::mutate(
    a52 = ff_label(a52, "Sex (Women)")
  ) %>% 
  ff_interaction(a52, naq_my_e) %>%
  summary_factorlist(dependent, explanatory, fit_id = TRUE, p = T, total_col = T) %>%
  ff_merge(fit_multi) %>% 
  ff_merge(fit_multi_no_match) %>% 
  dependent_label(matched_sample1, dependent) %>% 
  dplyr::mutate(` `=ifelse(` `=="1","No WB",` `)) %>% 
  dplyr::mutate(` `=ifelse(` `=="2","Workplace Bullying",` `)) %>% 
  dplyr::mutate(` `=ifelse(is.na(` `),"",` `)) %>% 
  dplyr::mutate(`Dependent: Risky drinking (Short AUDIT)`=ifelse(fit_id=="a52Women:naq_my_e2","Sex (Women) x NAQ (Mikkelsen & Einarsen)",
                                                                 `Dependent: Risky drinking (Short AUDIT)`)) %>% 
  dplyr::select(-fit_id,-unit,-value,-p,-Total,-index) %>%
  dplyr::rename(`Value`=` `) %>% 
  knitr::kable(row.names=FALSE, align=c("l", "l", "c", "c", "c", "c")) %>% 
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover","condensed"),font_size= 13) %>% 
  kableExtra::add_footnote(paste0("Note. Matched sample: ",fit_metrics,"\nUnmatched sample: ",fit_metrics_no_match), notation = "none") %>% 
  kableExtra::scroll_box(width = "100%", height = "250px")
Dependent: Risky drinking (Short AUDIT) Value OR Matched OR Unmatched
Sex (Women) Men
Women 0.32 (0.23-0.44, p<0.001) 0.39 (0.31-0.50, p<0.001)
NAQ (Mikkelsen & Einarsen) No WB
Workplace Bullying 0.48 (0.25-0.90, p=0.023) 0.71 (0.42-1.18, p=0.189)
Sex (Women) x NAQ (Mikkelsen & Einarsen) 6.31 (2.47-16.15, p<0.001) 2.07 (0.95-4.53, p=0.068)
Note. Matched sample: Number in dataframe = 1262, Number in model = 1262, Missing = 0, AIC = NA, C-statistic = 0.627, H&L = Chi-sq(8) 0.78 (p=0.999)
Unmatched sample: Number in dataframe = 1995, Number in model = 1995, Missing = 0, AIC = NA, C-statistic = 0.603, H&L = Chi-sq(8) 0.55 (p=1.000)
library(broom)
glance(fit_svyglm)
## Error in .svycheck(design): ..2 usado en un contexto incorreto, ningún ... para examinar
glance(svyglm_not_matched)
## Error in .svycheck(design): ..2 usado en un contexto incorreto, ningún ... para examinar
#summ(svyglm_not_matched)$model
#summ(svyglm_not_matched)$model$deviance

a<-svyglm(auditc~1, design=dsgn_matched_sample1,family=quasibinomial()) #Null model
anova(a,svyglm)
## Error: objeto de tipo 'closure' no es subconjunto
#  it's a test of whether the model is better than nothing.  It should agree with regTermTest(method="LRT")
 
 #Model fit statistics for a svyglm weighted regression model
poliscidata::fit.svyglm(fit_svyglm, digits = 3)
##               R-Squared      Adjusted R-Squared 
##                   0.045                   0.043
#R2 measures by themselves never measure goodness of fit; they measure mainly predictive discrimination. Goodness of fit only comes from comparing R2 with the R2 from a richer model

#Ignoring the complex sampling design may lead to notably biased parameter estimators.

paste0("Modelo con muestra pareada, pero ignorando disño complejo")
## [1] "Modelo con muestra pareada, pero ignorando disño complejo"
tidy(glm(auditc~a52*naq_my_e, data=matched_sample1, family="quasibinomial"))
## # A tibble: 4 x 5
##   term               estimate std.error statistic  p.value
##   <chr>                 <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)          -0.616    0.0888     -6.94 6.19e-12
## 2 a52Women             -1.07     0.145      -7.34 3.87e-13
## 3 naq_my_e2            -0.558    0.292      -1.91 5.61e- 2
## 4 a52Women:naq_my_e2    1.38     0.427       3.24 1.22e- 3
paste0("Modelo sin muestra pareada, pero ignorando disño complejo")
## [1] "Modelo sin muestra pareada, pero ignorando disño complejo"
tidy(glm(auditc~a52*naq_my_e, data=BD_2020_input3_match_not_miss %>%  dplyr::mutate(auditc=ifelse(auditc=="Yes",1,0)), family="quasibinomial"))
## # A tibble: 4 x 5
##   term               estimate std.error statistic  p.value
##   <chr>                 <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)          -0.662    0.0715     -9.26 4.99e-20
## 2 a52Women             -0.842    0.111      -7.56 6.22e-14
## 3 naq_my_e2            -0.356    0.240      -1.48 1.38e- 1
## 4 a52Women:naq_my_e2    0.474    0.357       1.33 1.85e- 1
#Here, the F-test does indicate that the interaction term in the model is significant, so the effects of education are not constant by race.
regTermTest(fit_svyglm, test.terms = ~a52:naq_my_e, method = "Wald", df=NULL)
## Wald test for a52:naq_my_e
##  in svyglm(formula = ..1, design = ..2, family = "quasibinomial")
## F =  14.78336  on  1  and  1258  df: p= 0.00012663
paste0("Si el test de Wald es significativo, los efectos de estar sujeto a violencia laboral varían según el sexo")
## [1] "Si el test de Wald es significativo, los efectos de estar sujeto a violencia laboral varían según el sexo"
car::vif(fit_svyglm)
##          a52     naq_my_e a52:naq_my_e 
##     1.140407     1.849357     2.030487
#No multicollinearity


jtools::plot_summs(svyglm_not_matched, fit_svyglm, 
                   model.names =c("Not matched","Matched sample"),
                    exp = T,
                  coefs = c("Sex(Women)"="a52",
          "Sex(Women)"="a52Women",
          "NAQ (Mikkelsen & Einarsen)"="naq_my_e2",
                         "NAQ (Mikkelsen & Einarsen)"="factor(naq_my_e)2",
                         "NAQ (Mikkelsen & Einarsen)"="naq_my_e2",
                         "Sex (Women) x NAQ (Mikkelsen & Einarsen)"="a52Women:naq_my_e2",
          "Sex (Women) x NAQ (Mikkelsen & Einarsen)"="a52Women:factor(naq_my_e)2"),
          colors=c("gray20","gray60"))+
  xlab("Odds Ratio")
Figure 6. Forest Plot Quasibinomial Models. Sex and Workplace Bullying on Screening of Risky drinking Coefficients

Figure 6. Forest Plot Quasibinomial Models. Sex and Workplace Bullying on Screening of Risky drinking Coefficients

interactions::cat_plot(fit_svyglm,pred="a52",modx="naq_my_e",y.label = "% of AUD",#itter = .6,plot.points = TRUE
                       modx.labels=c("Not WB", "WB"),x.label="Sex",legend.main="Workplace\nBullying (WB)",colors=c("gray20","gray60"))+scale_y_continuous(labels = scales::percent_format(accuracy = 2),limits=c(.0,1))
Figure 7. Probabillity of Showing Risky drinking by Sex & Workplace Bullying (Mikkelsen & Einarsen)

Figure 7. Probabillity of Showing Risky drinking by Sex & Workplace Bullying (Mikkelsen & Einarsen)

Bayesian

library(brms)
#load("C:/Users/andre/Google Drive/DOCUMENTOS/6.TESIS 2018/LCA NAQ/avance3_autonaq.RData")


# as a Bernoulli variable is a binomial variable with  n=1 .
Bayes_Model_Binary1a_nopri <- brm(formula = auditc | weights(weight) ~ a52*naq_my_e,  
                   data=matched_sample1, 
                   family = bernoulli(link = "logit"), #A vector of one or two character strings. The first string indicates the distribution of the dependent variable (the 'family'). Currently, the following families are supported: "gaussian", "student", "cauchy",
                   warmup = 2000, #A positive integer specifying number of warmup (aka burnin) iterations. This also specifies the number of iterations used for stepsize adaptation, so warmup samples should not be used for inference. The number of warmup should not be larger than n.
                   iter = 20000, #Number of total iterations per chain (including burnin; default: 2000)
                   chains = 7,
                   inits= "0", 
                   cores=7, #Number of Markov chains (default: 2)
                   seed = 2125)
library(emmeans)

Bayes_Model_Binary1b_pri_vi <- brm(formula = auditc | weights(weight) ~ a52*naq_my_e,  
                   data=matched_sample1, 
                   family = bernoulli(link = "logit"), #A vector of one or two character strings. The first string indicates the distribution of the dependent variable (the 'family'). Currently, the following families are supported: "gaussian", "student", "cauchy",
                   warmup = 2000, #A positive integer specifying number of warmup (aka burnin) iterations. This also specifies the number of iterations used for stepsize adaptation, so warmup samples should not be used for inference. The number of warmup should not be larger than n.
                   iter = 20000, #Number of total iterations per chain (including burnin; default: 2000)
                   chains = 7,
                   inits= "0", 
                   prior = prior(normal(0, 1), class = "b"), #A named list of character strings specifing the prior distributions of the parameters. Further information is provided under 'Details'.
                   cores=7, #Number of Markov chains (default: 2)
                   seed = 2125)

Bayes_Model_Binary1c_pri_int_vi <- brm(formula = auditc | weights(weight) ~ a52*naq_my_e,  
                   data=matched_sample1, 
                   prior = c(prior(normal(0, 1), class = Intercept),
                             prior(normal(0, 1), class = b)),
                   family = bernoulli(link = "logit"), #A vector of one or two character strings. The first string indicates the distribution of the dependent variable (the 'family'). Currently, the following families are supported: "gaussian", "student", "cauchy",
                   warmup = 2000, #A positive integer specifying number of warmup (aka burnin) iterations. This also specifies the number of iterations used for stepsize adaptation, so warmup samples should not be used for inference. The number of warmup should not be larger than n.
                   iter = 20000, #Number of total iterations per chain (including burnin; default: 2000)
                   chains = 7,
                   inits= "0", 
                   cores=7, #Number of Markov chains (default: 2)
                   seed = 2125)

Bayes_Model_Binary1c_pri_int0 <- brm(formula = auditc | weights(weight) ~ 1,  
                   data=matched_sample1, 
                   prior = c(prior(normal(0, 1), class = Intercept)
                             ),
                   family = bernoulli(link = "logit"), #A vector of one or two character strings. The first string indicates the distribution of the dependent variable (the 'family'). Currently, the following families are supported: "gaussian", "student", "cauchy",
                   warmup = 2000, #A positive integer specifying number of warmup (aka burnin) iterations. This also specifies the number of iterations used for stepsize adaptation, so warmup samples should not be used for inference. The number of warmup should not be larger than n.
                   iter = 20000, #Number of total iterations per chain (including burnin; default: 2000)
                   chains = 7,
                   inits= "0", 
                   cores=7, #Number of Markov chains (default: 2)
                   seed = 2125)

#Yes, that should work with brms as well, but you may need to specify reasonable priors on the regression coefficients so that the predictions are between 0 and 1 and thus resemble a probability.

Bayes_Model_Binary1d_pri_uncertain <- brm(formula = auditc | weights(weight) ~ a52*naq_my_e,  
                   data=matched_sample1, 
                  prior = c(set_prior ("normal(0,5)",class="b"),
                           set_prior ("normal(0,5)",class="Intercept")), #A named list of character strings specifing the prior distributions of the parameters. Further information is provided under 'Details'.
                   family = bernoulli(link = "logit"), #A vector of one or two character strings. The first string indicates the distribution of the dependent variable (the 'family'). Currently, the following families are supported: "gaussian", "student", "cauchy",
                   warmup = 2000, #A positive integer specifying number of warmup (aka burnin) iterations. This also specifies the number of iterations used for stepsize adaptation, so warmup samples should not be used for inference. The number of warmup should not be larger than n.
                   iter = 20000, #Number of total iterations per chain (including burnin; default: 2000)
                   chains = 7,
                   inits= "0", 
                   cores=7, #Number of Markov chains (default: 2)
                   seed = 2125)

rbind.data.frame(data.table::data.table(exp(fixef(Bayes_Model_Binary1a_nopri)[,-2]),keep.rownames = T),
      data.table::data.table(exp(fixef(Bayes_Model_Binary1b_pri_vi)[,-2]), keep.rownames = T),
      data.table::data.table(exp(fixef(Bayes_Model_Binary1c_pri_int_vi)[,-2]), keep.rownames = T),
      data.table::data.table(exp(fixef(Bayes_Model_Binary1d_pri_uncertain)[,-2]), keep.rownames = T)) %>% 
  dplyr::mutate(`CI95%` = paste0(sprintf("%1.2f",`Q2.5`),"-",sprintf("%1.2f",`Q97.5`))) %>% 
  dplyr::mutate(Estimate=sprintf(sprintf("%1.2f",Estimate))) %>% 
  dplyr::select(-Q2.5, -Q97.5) %>% 
  dplyr::filter(!grepl("Inter",rn))%>% 
  dplyr::mutate(rn=dplyr::case_when(rn=="a52Women"~"Sex(Women)",
                         rn=="naq_my_e2"~"NAQ (Mikkelsen & Einarsen)",
                         rn=="a52Women:naq_my_e2"~"Sex (Women) x NAQ (Mikkelsen & Einarsen)", T~rn)) %>% 
knitr::kable(format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
             caption="Table 8. Coefficients of Weighted Bayesian Models",
             col.names=c("Terms", "OR", "CI95%"),
             align =c("l",rep('c', 101)))  %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 13) %>%
  kableExtra::add_indent(1:12) %>% 
  kableExtra::group_rows("No priors",1,3) %>% 
  kableExtra::group_rows("Prior on the independent variables, assuming a mean of 0 & SD of 1",4,6) %>% 
  kableExtra::group_rows("Prior on the independent variables & intercept, assuming a mean of 0 & SD of 1",7,9) %>% 
  kableExtra::group_rows("Uncertain priors on the independent variables & intercept, assuming a mean of 0 & SD of 5",10,12) %>% 
  kableExtra::add_footnote(c("Note. Pecentages by row; Counts for the weighted table were approximated"), notation = "none") %>% 
  kableExtra::scroll_box(width = "100%", height = "350px")
Table 8. Coefficients of Weighted Bayesian Models
Terms OR CI95%
No priors
Sex(Women) 0.31 0.23-0.42
NAQ (Mikkelsen & Einarsen) 0.47 0.27-0.80
Sex (Women) x NAQ (Mikkelsen & Einarsen) 6.37 2.68-15.11
Prior on the independent variables, assuming a mean of 0 & SD of 1
Sex(Women) 0.33 0.25-0.44
NAQ (Mikkelsen & Einarsen) 0.56 0.33-0.90
Sex (Women) x NAQ (Mikkelsen & Einarsen) 4.46 2.04-9.67
Prior on the independent variables & intercept, assuming a mean of 0 & SD of 1
Sex(Women) 0.34 0.25-0.45
NAQ (Mikkelsen & Einarsen) 0.55 0.33-0.90
Sex (Women) x NAQ (Mikkelsen & Einarsen) 4.45 2.04-9.66
Uncertain priors on the independent variables & intercept, assuming a mean of 0 & SD of 5
Sex(Women) 0.31 0.23-0.42
NAQ (Mikkelsen & Einarsen) 0.47 0.27-0.80
Sex (Women) x NAQ (Mikkelsen & Einarsen) 6.27 2.67-14.75
Note. Pecentages by row; Counts for the weighted table were approximated
#weights: https://discourse.mc-stan.org/t/weights-in-brm/4278

We decided to keep the most conservative results, with, able to support more diffuse effect sizes.


#Gelman, Andrew, Aleks Jakulin, Maria Grazia Pittau, and Yu-Sung Su. 2008. “A Weakly Informative Default Prior Distribution for Logistic and Other Regression Models.” The Annals of Applied Statistics. JSTOR, 1360–83.

bayesplot::mcmc_trace(Bayes_Model_Binary1c_pri_int_vi, regex_pars = c("b_"), facet_args = list(nrow = 2))
Figure 8. Model convergence. Caterpillar plot for each term

Figure 8. Model convergence. Caterpillar plot for each term

#We note that our chains show convergence and are well-mixed, so we move on to taking a look at the estimates:
#The 7 chains mix well for all of the parameters and therefore, we can conclude no evidence of non-convergence.
#https://psyc-bayes-notes.netlify.app/generalized-linear-models.html

pp_check(Bayes_Model_Binary1c_pri_int_vi, type = "error_binned", nsamples = 9)
#where  ~y is based on simulated data from the posterior predictive distributions, and   y   is the observed data. 

#pp_check(Bayes_Model_Binary1b, type = "intervals")
#Posterior predictive check using the predicted and observed counts.
library(loo)
(loo1 <- loo(Bayes_Model_Binary1c_pri_int_vi, save_psis = TRUE))
(loo0 <- loo(Bayes_Model_Binary1c_pri_int_vi0 , save_psis = TRUE))

(loo2 <- loo(Bayes_Model_Binary1a_nopri, save_psis = TRUE))
(loo3 <- loo(Bayes_Model_Binary1b_pri_vi, save_psis = TRUE))
(loo4 <- loo(Bayes_Model_Binary1d_pri_uncertain, save_psis = TRUE))

#Results seem to be reliable according to Paretto estimates (<.5)
#Results seem to be reliable according to Paretto estimates (<.5)
loo_compare(loo1, loo2, loo3, loo4, loo0)
#http://www.stat.columbia.edu/~gelman/research/unpublished/loo_stan.pdf
#The difference will be positive if the expected predictive accuracy for the second model is higher.
#In this case, the difference was negative, so the so the post0 should
#estimated difference of expected leave-one-out prediction errors between the two models, along with the standard error:

model1 <- add_criterion(Bayes_Model_Binary1c_pri_int_vi, "waic")
model0 <- update(Bayes_Model_Binary1c_pri_int_vi, formula = auditc | weights(weight) ~ 1)
model0 <- add_criterion(model0,"waic")
loo_compare(model0,model1, criterion = "waic")

-23.5/7.4
#at least 3 standard error from 0

#First, instead of SE, it’s better to consider something like 2SE or more cautious 4SE, where 4 comes from the fact that SE for LOO can be underestimated for small n or under bad model misspecification. Second, the models can be very different and the predictions can be very different, it’s just that the average predictive accuracies are close to each other. Third, SE describe uncertainty, so if SE is large then it’s likely that the models do have big difference in predictive accuracy, but we don’t know whether the difference is negative or positive.

invisible("As seen here, the model des not predict well. However, we only used it for sensitivity analysis")
stanplot(Bayes_Model_Binary1c_pri_int_vi, 
         type = "areas",
         prob = 0.95, #95% intervals
        #pars = c("Intercept", "Birch5", "White_spruce5", "OD5","OS5","L5"),
         point_est = "median",
    area_method = "equal height",
         transformations = "exp") +
  scale_y_discrete(labels=c("Intecept","Women","NAQ (Mikkelsen\n& Einarsen)","Sex (Women)\nx\nNAQ (Mikkelsen &\nEinarsen)"))+
  geom_vline(xintercept = 1, color = "red", alpha = 0.6, lwd = .8, linetype = "dashed") +
  xlim(0,15)
Figure 9. Ridge term of Posterior distributions (Median in Bold, w/ 95% credible intervals) in Odds Ratio

Figure 9. Ridge term of Posterior distributions (Median in Bold, w/ 95% credible intervals) in Odds Ratio

#exp(fixef(Bayes_Model_Binary1c_pri_int_vi)[,-2])
#Despite outliers, 
library(RColorBrewer)

matched_sample1 %>%
  modelr::data_grid(naq_my_e, a52) %>%
  add_fitted_draws(Bayes_Model_Binary1c_pri_int_vi) %>%
  ggplot(aes(x = .value, y = interaction(naq_my_e, a52))) +
  stat_pointinterval(.width =  c(.66, .95), position = position_nudge(y = -0.3)) +
  scale_color_manual(values = brewer.pal(2, "Blues"))+
  coord_flip() +
  xlab("Predicted probability of Risky drinking") +
  ylab("Sex \nx\nNAQ (Mikkelsen & Einarsen)\n(2= Workplace Bullying)")+
  scale_x_continuous(labels = scales::percent_format(accuracy = 2),limits=c(.0,.7))+
  sjPlot::theme_sjplot2()
Figure 10. Probabillity of Showing Risky drinking by Sex & Workplace Bullying (Mikkelsen & Einarsen)

Figure 10. Probabillity of Showing Risky drinking by Sex & Workplace Bullying (Mikkelsen & Einarsen)


Sensitivity

df_tipr<-data.frame()
for (i in 1:100){
  x<-(1-i/100)
    skip_to_next <- FALSE
  
  # Note that print(b) fails since b doesn't exist
    tryCatch(
     df_tipr<-rbind(df_tipr,tipr::tip_with_binary(data.frame(exp(confint(fit_svyglm)))[4,],
                        unexposed_p= 0,
                        exposed_p = i/100,
                        verbose=F,
                        #outcome_association=4,
                        lb="X2.5..", ub="X97.5..")), 
           error = function(e) { skip_to_next <<- TRUE})
  
  if(skip_to_next) { next }     
    
}
rel_or_prev<-
df_tipr %>% ggplot(aes(x=exposed_p,outcome_association))+geom_point()+ sjPlot::theme_sjplot(base_size = 18)+ ylab("Odds Ratio")+ scale_x_continuous(name="Difference of exposure (in %)",labels = scales::percent)+ 
    geom_hline(yintercept=2, linetype='dotted', col = 'red',size=1)+ylim(c(0,50))
  
#1 if outcome is rare (<15 percent at end of follow-up); 0 if outcome is not rare
#(>15 percent at end of follow-up)
evalue_or<-
    EValue::evalues.OR(exp(fit_svyglm$coefficients[4]), exp(confint(fit_svyglm))[4,1], exp(confint(fit_svyglm))[4,2], rare=FALSE)

evalue_or2<-
    EValue::evalues.OR(exp(fixef(Bayes_Model_Binary1c_pri_int_vi)[,-2])[4,1], exp(fixef(Bayes_Model_Binary1c_pri_int_vi)[,-2])[4,2], exp(fixef(Bayes_Model_Binary1c_pri_int_vi)[,-2])[4,3], rare=FALSE)

We estimated the sensibility of the associations found assuming different values of the sensitivity parameter (\(\Gamma\)).


ggplotly(rel_or_prev)%>% layout(autosize = F, width = 600, height = 600)

Figure 11. Sensibility Parameters for the Interaction Found

xmax = 20
RR = evalue_or[1,1]

  x = seq( 0, xmax, 0.01 )
  
  # MM: reverse RR if it's preventive
  if ( RR < 1 ) RR = 1/RR
  
  plot( x, x, lty = 2, col = "white", type = "l", xaxs = "i", yaxs = "i", xaxt="n", yaxt = "n",
        xlab = bquote(.("Association of the Exposure and Confounder ") ~  OR[EU]), 
        ylab = bquote(.("Association of the Confounder and Outcome  ") ~ OR[UD]),
        xlim = c( 0,xmax ),
        main = "" )
  
  x = seq( RR, xmax, 0.01 )
  
  y    = RR*( RR-1 )/( x-RR )+RR
  
  lines( x, y, type = "l" )
  
  
  high = RR + sqrt( RR*( RR-1 ) )
  
  
  points( high, high, pch = 19 )
  
  label5 = seq( 5, 40, by = 5 )
  axis( 1, label5, label5, cex.axis = 1 )
  axis( 2, label5, label5, cex.axis = 1 )
  
  g = round( RR + sqrt( RR * ( RR - 1 ) ), 2 )
  label = paste( "( ", g, ", ", g, " )", sep="" )
  
  text( high + 3, high + 1, label )
  
  legend( "bottomleft", expression(
    OR[EU]*OR[UD]/( OR[EU]+OR[UD]-1 )==OR
  ), 
  lty = 1:2,
  bty = "n" )
Figure 11. Sensibility Parameters for the Interaction Found

Figure 11. Sensibility Parameters for the Interaction Found

With an observed odds ratio of 6.31, an unmeasured confounder that was associated with both the outcome and the exposure by a OR of 4.46-fold each, above and beyond the measured confounders, could explain away the estimate, but weaker confounding could not. For the Bayesian, the unmeasured confounder by a OR of 3.64-fold each.


Session Info

save.image("C:/Users/andre/Google Drive/DOCUMENTOS/6.TESIS 2018/LCA NAQ/avance3_autonaq.RData")
Sys.getenv("R_LIBS_USER")
## [1] "C:/Users/andre/Documents/R/win-library/4.0"
rstudioapi::getSourceEditorContext()
## Document Context: 
## - id:        '63E59C7D'
## - path:      'C:/Users/andre/Google Drive/DOCUMENTOS/6.TESIS 2018/LCA NAQ/Bullying_auditc.Rmd'
## - contents:  <3861 rows>
## Document Selection:
## - [745, 1] -- [745, 1]: ''
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Spanish_Chile.1252  LC_CTYPE=Spanish_Chile.1252   
## [3] LC_MONETARY=Spanish_Chile.1252 LC_NUMERIC=C                  
## [5] LC_TIME=Spanish_Chile.1252    
## 
## attached base packages:
## [1] parallel  grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] RColorBrewer_1.1-2     sandwich_3.0-0         Amelia_1.7.6          
##  [4] semTools_0.5-4         semPlot_1.1.2          lavaan_0.6-7          
##  [7] GPArotation_2014.11-1  gdtools_0.2.3          readr_1.4.0           
## [10] EValue_4.1.2           tipr_0.2.0             finalfit_1.0.2        
## [13] emmeans_1.5.4          loo_2.4.1              rstanarm_2.21.1       
## [16] ROCR_1.0-11            brms_2.15.0            Rcpp_1.0.6            
## [19] tidybayes_2.3.1        poliscidata_2.3.0      ggiraph_0.7.8         
## [22] interactions_1.1.3     jtools_2.1.2           cobalt_4.2.4          
## [25] sensitivityfull_1.5.6  sensitivity2x2xk_1.01  MatchIt_4.1.0         
## [28] exactRankTests_0.8-31  gridExtra_2.3          Hmisc_4.4-2           
## [31] Formula_1.2-4          foreign_0.8-80         glpkAPI_1.3.2         
## [34] designmatch_0.3.1      Rglpk_0.6-4            MASS_7.3-51.6         
## [37] lattice_0.20-41        gurobi_9.1-1           slam_0.1-48           
## [40] githubinstall_0.2.2    doParallel_1.0.16      iterators_1.0.13      
## [43] foreach_1.5.1          tableone_0.12.0        tidylog_1.0.2         
## [46] plotly_4.9.3           DiagrammeR_1.0.6.1     dplyr_1.0.5           
## [49] survey_4.0             survival_3.1-12        Matrix_1.2-18         
## [52] broom_0.7.4            knitr_1.31             tidyr_1.1.2           
## [55] igraph_1.2.6           ggparallel_0.2.0       ggplot2_3.3.3         
## [58] Weighted.Desc.Stat_1.0 plyr_1.8.6             reshape2_1.4.4        
## [61] Epi_2.43               remotes_2.2.0          data.table_1.14.0     
## [64] binda_1.0.3            entropy_1.2.1          car_3.0-10            
## [67] carData_3.0-4          psych_2.0.12           qdapTools_1.3.5       
## [70] visreg_2.7.0           here_1.0.1             googledrive_1.0.1     
## 
## loaded via a namespace (and not attached):
##   [1] TMB_1.7.20            clisymbols_1.2.0      mitools_2.4          
##   [4] pander_0.6.3          pbapply_1.4-3         haven_2.3.1          
##   [7] vctrs_0.3.6           V8_3.4.0              usethis_2.0.0        
##  [10] mgcv_1.8-31           later_1.1.0.1         nloptr_1.2.2.2       
##  [13] DBI_1.1.1             distributional_0.2.2  jpeg_0.1-8.1         
##  [16] sjmisc_2.8.6          pls_2.7-3             htmlwidgets_1.5.3    
##  [19] mvtnorm_1.1-1         kutils_1.70           inline_0.3.17        
##  [22] laeken_0.5.1          sROC_0.1-2            markdown_1.1         
##  [25] regsem_1.6.2          DEoptimR_1.0-8        KernSmooth_2.23-17   
##  [28] DT_0.17               promises_1.1.1        gdata_2.18.0         
##  [31] ggeffects_1.0.1       pkgload_1.1.0         RcppParallel_5.0.2   
##  [34] d3Network_0.5.2.1     fs_1.5.0              weights_1.0.1        
##  [37] mnormt_2.0.2          ranger_0.12.1         digest_0.6.27        
##  [40] png_0.1-7             pkgconfig_2.0.3       dygraphs_1.1.1.6     
##  [43] estimability_1.3      minqa_1.2.4           rstan_2.21.2         
##  [46] modeltools_0.2-23     nortest_1.0-4         xfun_0.22            
##  [49] zoo_1.8-8             tidyselect_1.1.0      performance_0.7.0    
##  [52] purrr_0.3.4           kernlab_0.9-29        labelled_2.7.0       
##  [55] pcaPP_1.9-73          viridisLite_0.4.0     gamm4_0.2-6          
##  [58] pkgbuild_1.2.0        rlang_0.4.10          glue_1.4.2           
##  [61] modelr_0.1.8          fpc_2.2-9             matrixStats_0.58.0   
##  [64] sgeostat_1.0-27       stringr_1.4.0         MVN_5.8              
##  [67] rockchalk_1.8.144     bayestestR_0.8.2      kableExtra_1.3.1     
##  [70] threejs_0.3.3         labeling_0.4.2        GGally_2.1.0         
##  [73] httpuv_1.5.5          class_7.3-17          BDgraph_2.63         
##  [76] qgraph_1.6.5          corpcor_1.6.9         shinystan_2.5.0      
##  [79] webshot_0.5.2         jsonlite_1.7.2        tmvnsim_1.0-2        
##  [82] mime_0.10             systemfonts_1.0.0     gplots_3.1.1         
##  [85] zCompositions_1.3.4   stringi_1.5.3         insight_0.12.0       
##  [88] processx_3.4.5        sem_3.1-11            ggdist_2.4.0         
##  [91] bitops_1.0-6          cli_2.3.1             rrcov_1.5-5          
##  [94] broom.mixed_0.2.6     rsconnect_0.8.16      mvoutlier_2.0.9      
##  [97] energy_1.7-7          rstudioapi_0.13       nlme_3.1-148         
## [100] janitor_2.1.0         ks_1.11.7             miniUI_0.1.1.1       
## [103] prabclus_2.3-2        NADA_1.6-1.1          sessioninfo_1.1.1    
## [106] readxl_1.3.1          lifecycle_1.0.0       munsell_0.5.0        
## [109] cellranger_1.1.0      robCompositions_2.3.0 moments_0.14         
## [112] visNetwork_2.0.9      caTools_1.18.1        codetools_0.2-16     
## [115] coda_0.19-4           mi_1.0                lmtest_0.9-38        
## [118] bayesplot_1.8.0       htmlTable_2.1.0       rstantools_2.1.1     
## [121] xtable_1.8-4          diptest_0.75-7        StanHeaders_2.21.0-7 
## [124] OpenMx_2.18.1         abind_1.4-5           farver_2.0.3         
## [127] askpass_1.1           svUnit_1.0.6          sjstats_0.18.1       
## [130] shinythemes_1.2.0     tibble_3.0.6          cluster_2.1.0        
## [133] fda_5.1.9             ellipsis_0.3.1        prettyunits_1.1.1    
## [136] lubridate_1.7.9.2     ggridges_0.5.3        mclust_5.4.7         
## [139] ggstance_0.3.5        sjlabelled_1.1.7      shinyjs_2.0.0        
## [142] gargle_0.5.0          etm_1.1.1             VIM_6.1.0            
## [145] parameters_0.11.0     testthat_3.0.1        htmltools_0.5.1.1    
## [148] yaml_2.2.1            utf8_1.2.1            XML_3.99-0.5         
## [151] fds_1.8               e1071_1.7-4           hdrcde_3.4           
## [154] withr_2.4.1           BiasedUrn_1.07        effectsize_0.4.3     
## [157] robustbase_0.93-7     devtools_2.3.2        memoise_2.0.0        
## [160] evaluate_0.14         forcats_0.5.1         rio_0.5.16           
## [163] callr_3.5.1           ps_1.5.0              curl_4.3             
## [166] metafor_2.4-0         fansi_0.4.2           fdrtool_1.2.16       
## [169] highr_0.8             xts_0.12.1            checkmate_2.0.0      
## [172] cachem_1.0.3          desc_1.2.0            cmprsk_2.2-10        
## [175] truncnorm_1.0-8       polycor_0.7-10        rjson_0.2.20         
## [178] openxlsx_4.2.3        rprojroot_2.0.2       tools_4.0.2          
## [181] rainbow_3.6           magrittr_2.0.1        Rsolnp_1.16          
## [184] RCurl_1.98-1.2        mice_3.13.0           pbivnorm_0.6.0       
## [187] xml2_1.3.2            httr_1.4.2            assertthat_0.2.1     
## [190] rmarkdown_2.6         boot_1.3-25           R6_2.5.0             
## [193] nnet_7.3-14           Brobdingnag_1.2-6     gtools_3.8.2         
## [196] statmod_1.4.35        sjPlot_2.8.7          lisrelToR_0.1.4      
## [199] splines_4.0.2         snakecase_0.11.0      projpred_2.0.2       
## [202] colorspace_2.0-0      generics_0.1.0        stats4_4.0.2         
## [205] base64enc_0.1-3       chron_2.3-56          pillar_1.4.7         
## [208] sp_1.4-5              uuid_0.1-4            gtable_0.3.0         
## [211] rvest_0.3.6           zip_2.1.1             colourpicker_1.1.0   
## [214] latticeExtra_0.6-29   fastmap_1.1.0         crosstalk_1.1.1      
## [217] flexmix_2.3-17        vcd_1.4-8             arrayhelpers_1.1-0   
## [220] descr_1.1.5           openssl_1.4.3         scales_1.1.1         
## [223] arm_1.11-2            huge_1.3.4.1          backports_1.2.1      
## [226] plotrix_3.8-1         lme4_1.1-26           hms_1.0.0            
## [229] shiny_1.6.0           numDeriv_2016.8-1.1   glasso_1.11          
## [232] lazyeval_0.2.2        cvTools_0.3.2         whisker_0.4          
## [235] crayon_1.4.1          pROC_1.17.0.1         matrixcalc_1.0-3     
## [238] reshape_0.8.8         MetaUtility_2.1.1     bridgesampling_1.1-2 
## [241] rpart_4.1-15          compiler_4.0.2