1. Base de Dados e Pré-processamento

Para este estudo utilizamos apenas o dataset referente ao ano de 2020 coletado no openDataSus Porta openDataSus.

Neste pré-processamento criamos a variável desfecho binarizada (CIRCOBITO_SUI) que informa a circunstância onde se deu a morte não natural.

#importando a base
dados <- data.table::fread("data/Mortalidade_Geral_2020.csv") %>%  
  tibble::as_tibble() #transformando em um tibble
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
#Pré-processamento das variáveis
source("munge/1-Selecionando.R") #selecionando variáveis de interesse
source("munge/2-Filtrando-Criando.R") #Filtrando e criando variáveis de interesse
source("munge/3-Renomeando.R") #renomeando niveis das variáveis

2. Análise Exploratória dos Dados

2.1 Tabela de contingência vs Variável binária suicídio

#criando a tabela de contingência
dados %>% 
  select( #aqui ta selecionando as variáveis na tabela
    IDADE,
    SEXO,
    RACACOR,
    ESTCIV,
    ESC2010,
    CIRCOBITO_SUI) %>% 
  tbl_summary(by = CIRCOBITO_SUI,
              statistic = list(all_continuous() ~ "{mean} ({sd})")) %>% 
  add_p() #adiciona os p-valores dos testes estatísticos
Characteristic Outras causas, N = 129,9931 Suicídio, N = 13,1431 p-value2
IDADE 43 (22) 43 (18) <0.001
SEXO <0.001
Masculino 106,139 (82%) 10,321 (79%)
Feminino 23,844 (18%) 2,822 (21%)
Unknown 10 0
RACACOR <0.001
Branca 44,694 (35%) 6,322 (49%)
Preta 9,153 (7.2%) 719 (5.5%)
Amarela 516 (0.4%) 47 (0.4%)
Parda 72,983 (57%) 5,805 (45%)
Indígena 477 (0.4%) 115 (0.9%)
Unknown 2,170 135
ESTCIV <0.001
Solteiro 72,319 (58%) 6,766 (53%)
Casado 24,369 (19%) 3,232 (25%)
Viúvo 9,428 (7.5%) 419 (3.3%)
Separado/divorciado 6,074 (4.8%) 978 (7.6%)
União estável 6,452 (5.1%) 676 (5.3%)
Ignorado 6,887 (5.5%) 768 (6.0%)
Unknown 4,464 304
ESC2010 <0.001
Sem escolaridade 8,276 (6.8%) 527 (4.2%)
Fundamental I 29,308 (24%) 2,443 (20%)
Fundamental II 37,116 (30%) 3,035 (24%)
Ensino Médio 24,040 (20%) 2,946 (24%)
Superior incompleto 1,757 (1.4%) 428 (3.4%)
Superior completo 3,866 (3.2%) 911 (7.3%)
Ignorado 17,460 (14%) 2,152 (17%)
Unknown 8,170 701

1 Mean (SD); n (%)

2 Wilcoxon rank sum test; Pearson's Chi-squared test

2.2 Densidade em relação ao desfecho

Sexo

dados %>% 
  ggplot(aes(IDADE, fill=CIRCOBITO_SUI)) + geom_density(alpha=0.5) + facet_wrap(~SEXO) 

NA

Raca Cor

dados %>% 
  ggplot(aes(IDADE, fill=CIRCOBITO_SUI)) + geom_density(alpha=0.5) + facet_wrap(~RACACOR)

Estado Civil

dados %>% 
  ggplot(aes(IDADE, fill=CIRCOBITO_SUI)) + geom_density(alpha=0.5) + facet_wrap(~ESTCIV)

Escolaridade

dados %>% 
  ggplot(aes(IDADE, fill=CIRCOBITO_SUI)) + geom_density(alpha=0.5) + facet_wrap(~ESC2010)

3. Algoritmos de Machine Learning

3.1 Configurações do treinamento

#selecionando variáveis de interesse
dados <- dados %>% 
  select(SEXO, IDADE, ESC2010, RACACOR, CIRCOBITO_SUI) %>% 
  drop_na()

# Separação dos dados de treinamento e de teste com o pacote caret
split <- createDataPartition(y = dados$CIRCOBITO_SUI, p = 0.7, list = FALSE)
treinamento <- dados[split,]
teste <- dados[-split,]

#aplicando o método de cross validation com 10 fold e 10 repetições
fitControl <- trainControl(## 10-fold CV
  method = "repeatedcv",
  number = 10,
  repeats = 5) ## repeated ten times
#métrica para comparacao entre classificadores
metric <- "Accuracy"

3.2 Treinando os Algoritmos de Classificação

Regressão Logística

confusionMatrix(predicao, teste$CIRCOBITO_SUI, positive = "Suicídio")
Confusion Matrix and Statistics

               Reference
Prediction      Outras causas Suicídio
  Outras causas         36114     3705
  Suicídio                  0        0
                                          
               Accuracy : 0.907           
                 95% CI : (0.9041, 0.9098)
    No Information Rate : 0.907           
    P-Value [Acc > NIR] : 0.5044          
                                          
                  Kappa : 0               
                                          
 Mcnemar's Test P-Value : <2e-16          
                                          
            Sensitivity : 0.00000         
            Specificity : 1.00000         
         Pos Pred Value :     NaN         
         Neg Pred Value : 0.90695         
             Prevalence : 0.09305         
         Detection Rate : 0.00000         
   Detection Prevalence : 0.00000         
      Balanced Accuracy : 0.50000         
                                          
       'Positive' Class : Suicídio        
                                          

Análise Descriminante

confusionMatrix(predicao, teste$CIRCOBITO_SUI, positive = "Suicídio")
Confusion Matrix and Statistics

               Reference
Prediction      Outras causas Suicídio
  Outras causas         36111     3705
  Suicídio                  3        0
                                         
               Accuracy : 0.9069         
                 95% CI : (0.904, 0.9097)
    No Information Rate : 0.907          
    P-Value [Acc > NIR] : 0.525          
                                         
                  Kappa : -2e-04         
                                         
 Mcnemar's Test P-Value : <2e-16         
                                         
            Sensitivity : 0.000e+00      
            Specificity : 9.999e-01      
         Pos Pred Value : 0.000e+00      
         Neg Pred Value : 9.069e-01      
             Prevalence : 9.305e-02      
         Detection Rate : 0.000e+00      
   Detection Prevalence : 7.534e-05      
      Balanced Accuracy : 5.000e-01      
                                         
       'Positive' Class : Suicídio       
                                         

3.3 Comparação entre os algoritmos

# Obtendo o Resultado
results <- resamples(list(LG=modelfit.glm, LDA=modelfit.lda))
summary(results)

Call:
summary.resamples(object = results)

Models: LG, LDA 
Number of resamples: 50 

Accuracy 
         Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
LG  0.9069492 0.9070468 0.9070568 0.9070363 0.9070568 0.9070568    0
LDA 0.9067341 0.9069492 0.9069492 0.9069502 0.9069592 0.9070568    0

Kappa 
             Min.       1st Qu.        Median          Mean 3rd Qu.        Max. NA's
LG   0.0000000000  0.0000000000  0.0000000000  0.000000e+00       0 0.000000000    0
LDA -0.0006436195 -0.0002149438 -0.0002149438 -5.904422e-05       0 0.001880977    0
dotplot(results)

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CiNhcXVpIGZpY2EgdHVkbyBxdWUgZGV2ZSBzZXIgaW5pY2lhbGl6YWRvIGF1dG9tYXRpY2FtZW50ZSBubyBpbsOtY2lvIGRvIHByb2pldG8KbGlicmFyeSh0aWR5dmVyc2UpICNtYW5pcHVsYXIgZGFkb3MKbGlicmFyeShndHN1bW1hcnkpICNnZXJhciB0YWJlbGFzIGRlIGNvbnRpbmdlbmNpYSBjb20gcC12YWxvcgpsaWJyYXJ5KGZvcm1hdHRhYmxlKSAjZ2VyYXIgdGFiZWxhcwpsaWJyYXJ5KERUKSAjZ2VyYXIgdGFiZWxhcwpsaWJyYXJ5KHJtYXJrZG93bikgI2VzdGlsbyBkbyByZWxhdMOzcmlvCmxpYnJhcnkoZGF0YS50YWJsZSkgI2NhcnJlZ2FyIGdyYW5kZXMgYmFzZXMgZGUgZGFkb3MKbGlicmFyeShzalBsb3QpICNwbG90YSBjb2VmaWNpZW50ZXMgZGUgbW9kZWxvcyBkZSByZWdyZXNzw6NvCmxpYnJhcnkoY2FyZXQpICAjcGFjb3RlIGRlIGFsZ29yaXRtb3MgZGUgY2xhc3NpZmljYWNhbwoKa25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQoKYGBgCgojIDEuIEJhc2UgZGUgRGFkb3MgZSBQcsOpLXByb2Nlc3NhbWVudG8KClBhcmEgZXN0ZSBlc3R1ZG8gdXRpbGl6YW1vcyBhcGVuYXMgbyBkYXRhc2V0IHJlZmVyZW50ZSBhbyBhbm8gZGUgMjAyMCBjb2xldGFkbyBubyBvcGVuRGF0YVN1cyBbUG9ydGEgb3BlbkRhdGFTdXNdKGh0dHBzOi8vb3BlbmRhdGFzdXMuc2F1ZGUuZ292LmJyKS4KCk5lc3RlIHByw6ktcHJvY2Vzc2FtZW50byBjcmlhbW9zIGEgdmFyacOhdmVsIGRlc2ZlY2hvIGJpbmFyaXphZGEgKENJUkNPQklUT19TVUkpIHF1ZSBpbmZvcm1hIGEgY2lyY3Vuc3TDom5jaWEgb25kZSBzZSBkZXUgYSBtb3J0ZSBuw6NvIG5hdHVyYWwuCgpgYGB7ciBpbXBvcnR9CiNpbXBvcnRhbmRvIGEgYmFzZQpkYWRvcyA8LSBkYXRhLnRhYmxlOjpmcmVhZCgiZGF0YS9Nb3J0YWxpZGFkZV9HZXJhbF8yMDIwLmNzdiIpICU+JSAgCiAgdGliYmxlOjphc190aWJibGUoKSAjdHJhbnNmb3JtYW5kbyBlbSB1bSB0aWJibGUKCiNQcsOpLXByb2Nlc3NhbWVudG8gZGFzIHZhcmnDoXZlaXMKc291cmNlKCJtdW5nZS8xLVNlbGVjaW9uYW5kby5SIikgI3NlbGVjaW9uYW5kbyB2YXJpw6F2ZWlzIGRlIGludGVyZXNzZQpzb3VyY2UoIm11bmdlLzItRmlsdHJhbmRvLUNyaWFuZG8uUiIpICNGaWx0cmFuZG8gZSBjcmlhbmRvIHZhcmnDoXZlaXMgZGUgaW50ZXJlc3NlCnNvdXJjZSgibXVuZ2UvMy1SZW5vbWVhbmRvLlIiKSAjcmVub21lYW5kbyBuaXZlaXMgZGFzIHZhcmnDoXZlaXMKYGBgCgoKCgojIDIuIEFuw6FsaXNlIEV4cGxvcmF0w7NyaWEgZG9zIERhZG9zCgojIyAyLjEgVGFiZWxhIGRlIGNvbnRpbmfDqm5jaWEgdnMgVmFyacOhdmVsIGJpbsOhcmlhIHN1aWPDrWRpbwpgYGB7ciwgbGF5b3V0PSJsLWJvZHktb3V0c2V0In0KI2NyaWFuZG8gYSB0YWJlbGEgZGUgY29udGluZ8OqbmNpYQpkYWRvcyAlPiUgCiAgc2VsZWN0KCAjYXF1aSB0YSBzZWxlY2lvbmFuZG8gYXMgdmFyacOhdmVpcyBuYSB0YWJlbGEKICAgIElEQURFLAogICAgU0VYTywKICAgIFJBQ0FDT1IsCiAgICBFU1RDSVYsCiAgICBFU0MyMDEwLAogICAgQ0lSQ09CSVRPX1NVSSkgJT4lIAogIHRibF9zdW1tYXJ5KGJ5ID0gQ0lSQ09CSVRPX1NVSSwKICAgICAgICAgICAgICBzdGF0aXN0aWMgPSBsaXN0KGFsbF9jb250aW51b3VzKCkgfiAie21lYW59ICh7c2R9KSIpKSAlPiUgCiAgYWRkX3AoKSAjYWRpY2lvbmEgb3MgcC12YWxvcmVzIGRvcyB0ZXN0ZXMgZXN0YXTDrXN0aWNvcwpgYGAKCgojIyAyLjIgRGVuc2lkYWRlIGVtIHJlbGHDp8OjbyBhbyBkZXNmZWNobyB7LnRhYnNldH0KCiMjIyBTZXhvCmBgYHtyIGRlbnNpZGFkZV9zZXhvLCBlY2hvPVRSVUUsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CmRhZG9zICU+JSAKICBnZ3Bsb3QoYWVzKElEQURFLCBmaWxsPUNJUkNPQklUT19TVUkpKSArIGdlb21fZGVuc2l0eShhbHBoYT0wLjUpICsgZmFjZXRfd3JhcCh+U0VYTykgCiAgCmBgYAoKIyMjIFJhY2EgQ29yCmBgYHtyIGRlbnNpZGFkZV9yYWNhLCBlY2hvPVRSVUUsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CmRhZG9zICU+JSAKICBnZ3Bsb3QoYWVzKElEQURFLCBmaWxsPUNJUkNPQklUT19TVUkpKSArIGdlb21fZGVuc2l0eShhbHBoYT0wLjUpICsgZmFjZXRfd3JhcCh+UkFDQUNPUikKYGBgCgojIyMgRXN0YWRvIENpdmlsCmBgYHtyIGRlbnNpZGFkZV9jaXZpbCwgZWNobz1UUlVFLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpkYWRvcyAlPiUgCiAgZ2dwbG90KGFlcyhJREFERSwgZmlsbD1DSVJDT0JJVE9fU1VJKSkgKyBnZW9tX2RlbnNpdHkoYWxwaGE9MC41KSArIGZhY2V0X3dyYXAofkVTVENJVikKYGBgCgojIyMgRXNjb2xhcmlkYWRlCmBgYHtyIGRlbnNpZGFkZV9lc2NvbGFyaWRhZGUsIGVjaG89VFJVRSwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KZGFkb3MgJT4lIAogIGdncGxvdChhZXMoSURBREUsIGZpbGw9Q0lSQ09CSVRPX1NVSSkpICsgZ2VvbV9kZW5zaXR5KGFscGhhPTAuNSkgKyBmYWNldF93cmFwKH5FU0MyMDEwKQpgYGAKCgoKCiMgMy4gQWxnb3JpdG1vcyBkZSBNYWNoaW5lIExlYXJuaW5nCiMjIDMuMSBDb25maWd1cmHDp8O1ZXMgZG8gdHJlaW5hbWVudG8KCmBgYHtyIG1vZGVsb30KI3NlbGVjaW9uYW5kbyB2YXJpw6F2ZWlzIGRlIGludGVyZXNzZQpkYWRvcyA8LSBkYWRvcyAlPiUgCiAgc2VsZWN0KFNFWE8sIElEQURFLCBFU0MyMDEwLCBSQUNBQ09SLCBDSVJDT0JJVE9fU1VJKSAlPiUgCiAgZHJvcF9uYSgpCgojIFNlcGFyYcOnw6NvIGRvcyBkYWRvcyBkZSB0cmVpbmFtZW50byBlIGRlIHRlc3RlIGNvbSBvIHBhY290ZSBjYXJldApzcGxpdCA8LSBjcmVhdGVEYXRhUGFydGl0aW9uKHkgPSBkYWRvcyRDSVJDT0JJVE9fU1VJLCBwID0gMC43LCBsaXN0ID0gRkFMU0UpCnRyZWluYW1lbnRvIDwtIGRhZG9zW3NwbGl0LF0KdGVzdGUgPC0gZGFkb3NbLXNwbGl0LF0KCiNhcGxpY2FuZG8gbyBtw6l0b2RvIGRlIGNyb3NzIHZhbGlkYXRpb24gY29tIDEwIGZvbGQgZSAxMCByZXBldGnDp8O1ZXMKZml0Q29udHJvbCA8LSB0cmFpbkNvbnRyb2woIyMgMTAtZm9sZCBDVgogIG1ldGhvZCA9ICJyZXBlYXRlZGN2IiwKICBudW1iZXIgPSAxMCwKICByZXBlYXRzID0gNSkgIyMgcmVwZWF0ZWQgdGVuIHRpbWVzCiNtw6l0cmljYSBwYXJhIGNvbXBhcmFjYW8gZW50cmUgY2xhc3NpZmljYWRvcmVzCm1ldHJpYyA8LSAiQWNjdXJhY3kiCgpgYGAKCiMjIDMuMiBUcmVpbmFuZG8gb3MgQWxnb3JpdG1vcyBkZSBDbGFzc2lmaWNhw6fDo28gey50YWJzZXR9CgojIyMgUmVncmVzc8OjbyBMb2fDrXN0aWNhCmBgYHtyIExHLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQojIExvZ2lzdGljIFJlZ3Jlc3Npb24gKExHKQpzZXQuc2VlZCg3KQptb2RlbGZpdC5nbG0gPC0gdHJhaW4oQ0lSQ09CSVRPX1NVSX4uLCBkYXRhPXRyZWluYW1lbnRvLCBtZXRob2Q9ImdsbSIsIG1ldHJpYz1tZXRyaWMsCiAgICAgICAgICAgICAgICAgICAgICB0ckNvbnRyb2w9Zml0Q29udHJvbCwgbmEuYWN0aW9uPW5hLm9taXQpCgojcHJlZGnDp8OjbyBlIG1hdHJpeiBkZSBjb25mdXPDo28KcHJlZGljYW8gPC0gcHJlZGljdChtb2RlbGZpdC5nbG0sIG5ld2RhdGEgPSB0ZXN0ZSkKY29uZnVzaW9uTWF0cml4KHByZWRpY2FvLCB0ZXN0ZSRDSVJDT0JJVE9fU1VJLCBwb3NpdGl2ZSA9ICJTdWljw61kaW8iKQpgYGAKCiMjIyBBbsOhbGlzZSBEZXNjcmltaW5hbnRlCmBgYHtyIExEQSwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KIyBMaW5lYXIgRGlzY3JpbWluYXRlIEFuYWx5c2lzIChMREEpCnNldC5zZWVkKDcpCm1vZGVsZml0LmxkYSA8LSB0cmFpbihDSVJDT0JJVE9fU1VJfi4sIGRhdGE9dHJlaW5hbWVudG8sIG1ldGhvZD0ibGRhIiwgbWV0cmljPW1ldHJpYywgCiAgICAgICAgICAgICAgICB0ckNvbnRyb2w9Zml0Q29udHJvbCwgbmEuYWN0aW9uPW5hLm9taXQpCgojcHJlZGnDp8OjbyBlIG1hdHJpeiBkZSBjb25mdXPDo28KcHJlZGljYW8gPC0gcHJlZGljdChtb2RlbGZpdC5sZGEsIG5ld2RhdGEgPSB0ZXN0ZSkKY29uZnVzaW9uTWF0cml4KHByZWRpY2FvLCB0ZXN0ZSRDSVJDT0JJVE9fU1VJLCBwb3NpdGl2ZSA9ICJTdWljw61kaW8iKQpgYGAKCgojIyAzLjMgQ29tcGFyYcOnw6NvIGVudHJlIG9zIGFsZ29yaXRtb3MKCmBgYHtyIGNvbXBhcmFjYW9BbGdvcml0bW9zfQojIE9idGVuZG8gbyBSZXN1bHRhZG8KcmVzdWx0cyA8LSByZXNhbXBsZXMobGlzdChMRz1tb2RlbGZpdC5nbG0sIExEQT1tb2RlbGZpdC5sZGEpKQpzdW1tYXJ5KHJlc3VsdHMpCmRvdHBsb3QocmVzdWx0cykKCmBgYAoKCg==