Carregando bibliotecas

# "limpar" ambiente
rm(list = ls())

# Carregar bibliotecas
library(readxl)
library(readr) 
library(mfx)
library(caret)
library(pROC)
library(ResourceSelection)
library(modEvA)
library(foreign)
library(stargazer) #Hlavac, Marek (2022) 
library(ggplot2)
library(glm2)
library(PerformanceAnalytics)
library(nnet) #logit multinomial
library(car)
library(reshape2)
library(lmtest)
library(MASS)
library(pROC)
library(dplyr)
library(margins)
library(broom)

Exercício de Robustez

Foi retirado a categoria Active da base de informações afim de testar se os resultados do modelo permanecem de maneira similar aos dados originais, afim de atestar o poder preditivo do modelo estimado.

m2 <- read_excel("C:/Users/kassy/OneDrive - caen.ufc.br/1. CAEN/3. Doutorado/Thesis/Test banco mundial/corrigidaBDsemactive.xlsx")
#with(m2, table(m2$catstatus, m2$`Project status`))
# Crie uma tabela cruzada com as contagens
tabela_contagem2 <- table(m2$`Project status`)

# Crie um data frame com a contagem e os percentuais
tabela_completa2 <- data.frame(Contagem = tabela_contagem2, Percentual = prop.table(tabela_contagem2) * 100)

# Exiba a tabela
print(tabela_completa2)
##   Contagem.Var1 Contagem.Freq Percentual.Var1 Percentual.Freq
## 1     Cancelled           176       Cancelled        51.16279
## 2     Concluded           107       Concluded        31.10465
## 3    Distressed            61      Distressed        17.73256

Definição das variáveis m2

Categorização dos tipos de contratos para escrever as equações como:

\(ln(\frac{P(status=Cancelled)}{P(status=Concluded)})\), \(ln(\frac{P(status=Distressed)}{P(status=Concluded)})\)

m2$`Project status` <- as.factor(m2$`Project status`)

m2$`Project status` <- relevel(m2$`Project status`, ref = "Concluded")
is.factor(m2$`Project status`)
## [1] TRUE
ITm2=m2$TotalInvestment1
percentprivm2=m2$PercentPrivate
tm2=m2$ContractPeriod
km2=m2$PhysicalAssets
debtm2=m2$TotalDebtFunding
#catstatusm2=m2$catstatus
statusm2=m2$`Project status`

variáveis categóricas m2

catpppm2=m2$catppp
#catsubtypem2=m2$catsubtype
#catsetorm2=m2$catsetor
#catsetor2m2=m2$catsetor2
#catsetorm2=m2$catsetor3
catbidm2 = m2$catbid
award = m2$AwardMethod
award = factor(award)
award <- relevel(award, ref = "Not Available")
m2$sectorR <- as.factor(m2$sectorR)
m2$sectorR <- relevel(m2$sectorR, ref = "Enrg")
is.factor(m2$sectorR)
## [1] TRUE
# numericamente
sector = m2$sectorR
catpppm2 = factor(catpppm2)
#catsetorm2 = factor(catsetorm2)
#catsetor2m2 = factor(catsetor2m2)
#catsetorm2 = factor(catsetorm2)
#catsubtypem2 = factor(catsubtypem2) 
catbidm2 = factor(catbidm2)
#catstatusm2 = factor(catstatusm2)
#status=m2$`Project status`
logm20 = multinom(statusm2 ~ ITm2 + km2 + tm2 + percentprivm2  + catpppm2 + sector + debtm2 + catbidm2 + award, data=m2)
## # weights:  75 (48 variable)
## initial  value 377.922627 
## iter  10 value 266.148730
## iter  20 value 172.158004
## iter  30 value 155.994029
## iter  40 value 154.422766
## iter  50 value 154.234559
## iter  60 value 154.215803
## iter  70 value 154.206328
## iter  80 value 154.201999
## final  value 154.201869 
## converged
summary(logm20)
## Warning in sqrt(diag(vc)): NaNs produzidos
## Call:
## multinom(formula = statusm2 ~ ITm2 + km2 + tm2 + percentprivm2 + 
##     catpppm2 + sector + debtm2 + catbidm2 + award, data = m2)
## 
## Coefficients:
##            (Intercept)       ITm2         km2       tm2 percentprivm2 catpppm22
## Cancelled    -4.418900 0.03829133 -0.02930562 0.3370595    0.01989920  71.70498
## Distressed   -3.190264 0.03787883 -0.02816970 0.3309051    0.01037829  69.85288
##             catpppm23 catpppm24 sectorICT  sectorMSW  sectorTrp sectorWater
## Cancelled  -0.3440331 -3.684041  3.080200 33.9383745 -0.9609535   1.1063518
## Distressed -1.7454542 -5.861811  3.062471 -0.8154525 -1.5494055   0.1674533
##                  debtm2 catbidm22 catbidm23 catbidm24 catbidm25  catbidm26
## Cancelled  -0.007354351   16.1119  38.85502  6.524841   1.36369   4.925596
## Distressed -0.007816891  -14.3451 -20.76899  8.088845 -41.34901 -19.477142
##             catbidm27 catbidm28 awardCompetitive bidding
## Cancelled    3.672508  6.869917                -3.439337
## Distressed -38.530712  8.747450                -4.680964
##            awardCompetitive negotiation awardDirect negotiation
## Cancelled                     -38.67963                1.595503
## Distressed                    -50.55142                2.204469
##            awardLicense scheme
## Cancelled             49.11250
## Distressed            49.45474
## 
## Std. Errors:
##            (Intercept)        ITm2         km2        tm2 percentprivm2
## Cancelled    0.4799097 0.006727261 0.006650749 0.06089194    0.01557380
## Distressed   0.4762275 0.006827769 0.006751559 0.06193099    0.01612834
##            catpppm22 catpppm23 catpppm24 sectorICT sectorMSW sectorTrp
## Cancelled  0.2962723 0.6255181 0.7667557 0.4957325       NaN 0.5245542
## Distressed 0.2962723 0.6544662 1.0642272 0.5106026       NaN 0.5391721
##            sectorWater      debtm2    catbidm22    catbidm23 catbidm24
## Cancelled    0.6813492 0.007486729 1.772356e-10          NaN 0.4303081
## Distressed   0.7396463 0.007492290 5.748668e-15 6.712961e-27 0.4124719
##               catbidm25    catbidm26    catbidm27 catbidm28
## Cancelled  1.568331e-01 5.893464e-01 1.338475e+00 0.6733672
## Distressed 2.061250e-20 2.530027e-11 2.397360e-18 0.5528887
##            awardCompetitive bidding awardCompetitive negotiation
## Cancelled                 0.6508419                 1.942703e-07
## Distressed                0.7677266                 1.942703e-07
##            awardDirect negotiation awardLicense scheme
## Cancelled                0.5529975           0.3935283
## Distressed               0.5153553           0.3935283
## 
## Residual Deviance: 308.4037 
## AIC: 404.4037

Método Stepwise

O método auxilia em selecionar as variáveis importantes para o modelo.

step1 <- step(logm20, direction = 'both')
## Start:  AIC=404.4
## statusm2 ~ ITm2 + km2 + tm2 + percentprivm2 + catpppm2 + sector + 
##     debtm2 + catbidm2 + award
## 
## trying - ITm2 
## # weights:  72 (46 variable)
## initial  value 377.922627 
## iter  10 value 251.195300
## iter  20 value 179.532584
## iter  30 value 175.010637
## iter  40 value 174.632608
## iter  50 value 174.589944
## iter  60 value 174.585444
## iter  70 value 174.584965
## final  value 174.584962 
## converged
## trying - km2 
## # weights:  72 (46 variable)
## initial  value 377.922627 
## iter  10 value 235.376226
## iter  20 value 167.930356
## iter  30 value 162.860214
## iter  40 value 162.435052
## iter  50 value 162.338045
## iter  60 value 162.337714
## iter  70 value 162.337263
## final  value 162.337143 
## converged
## trying - tm2 
## # weights:  72 (46 variable)
## initial  value 377.922627 
## iter  10 value 271.054031
## iter  20 value 190.795650
## iter  30 value 182.024825
## iter  40 value 181.650946
## iter  50 value 181.512760
## iter  60 value 181.510684
## iter  70 value 181.510446
## final  value 181.510442 
## converged
## trying - percentprivm2 
## # weights:  72 (46 variable)
## initial  value 377.922627 
## iter  10 value 245.849093
## iter  20 value 166.559016
## iter  30 value 155.748381
## iter  40 value 155.484375
## iter  50 value 155.407477
## iter  60 value 155.399380
## iter  70 value 155.391423
## iter  80 value 155.390909
## final  value 155.390896 
## converged
## trying - catpppm2 
## # weights:  66 (42 variable)
## initial  value 377.922627 
## iter  10 value 266.182672
## iter  20 value 177.228864
## iter  30 value 168.530898
## iter  40 value 168.184364
## iter  50 value 168.132584
## iter  60 value 168.096270
## iter  70 value 168.095229
## iter  70 value 168.095228
## iter  70 value 168.095228
## final  value 168.095228 
## converged
## trying - sector 
## # weights:  63 (40 variable)
## initial  value 377.922627 
## iter  10 value 266.158359
## iter  20 value 175.668743
## iter  30 value 161.275274
## iter  40 value 160.749367
## iter  50 value 160.694585
## iter  60 value 160.686216
## iter  70 value 160.684300
## final  value 160.684293 
## converged
## trying - debtm2 
## # weights:  72 (46 variable)
## initial  value 377.922627 
## iter  10 value 233.846879
## iter  20 value 164.163452
## iter  30 value 154.707814
## iter  40 value 154.401308
## iter  50 value 154.325017
## iter  60 value 154.314002
## iter  70 value 154.313084
## final  value 154.313023 
## converged
## trying - catbidm2 
## # weights:  54 (34 variable)
## initial  value 377.922627 
## iter  10 value 266.198030
## iter  20 value 192.396053
## iter  30 value 182.583911
## iter  40 value 182.123550
## iter  50 value 182.089515
## iter  60 value 182.086634
## final  value 182.086487 
## converged
## trying - award 
## # weights:  63 (40 variable)
## initial  value 377.922627 
## iter  10 value 266.148538
## iter  20 value 181.115179
## iter  30 value 171.812337
## iter  40 value 171.488692
## iter  50 value 171.466307
## iter  60 value 171.463965
## final  value 171.463825 
## converged
##                 Df      AIC
## - debtm2        46 400.6260
## - sector        40 401.3686
## - percentprivm2 46 402.7818
## <none>          48 404.4037
## - km2           46 416.6743
## - catpppm2      42 420.1905
## - award         40 422.9276
## - catbidm2      34 432.1730
## - ITm2          46 441.1699
## - tm2           46 455.0209
## # weights:  72 (46 variable)
## initial  value 377.922627 
## iter  10 value 233.846879
## iter  20 value 164.163452
## iter  30 value 154.707814
## iter  40 value 154.401308
## iter  50 value 154.325017
## iter  60 value 154.314002
## iter  70 value 154.313084
## final  value 154.313023 
## converged
## 
## Step:  AIC=400.63
## statusm2 ~ ITm2 + km2 + tm2 + percentprivm2 + catpppm2 + sector + 
##     catbidm2 + award
## 
## trying - ITm2 
## # weights:  69 (44 variable)
## initial  value 377.922627 
## iter  10 value 223.356493
## iter  20 value 177.604971
## iter  30 value 175.102283
## iter  40 value 174.869174
## iter  50 value 174.819611
## iter  60 value 174.817757
## final  value 174.817722 
## converged
## trying - km2 
## # weights:  69 (44 variable)
## initial  value 377.922627 
## iter  10 value 224.739495
## iter  20 value 169.683214
## iter  30 value 162.917068
## iter  40 value 162.703404
## iter  50 value 162.632319
## iter  60 value 162.629732
## final  value 162.629702 
## converged
## trying - tm2 
## # weights:  69 (44 variable)
## initial  value 377.922627 
## iter  10 value 255.152474
## iter  20 value 188.591818
## iter  30 value 182.830257
## iter  40 value 182.556625
## iter  50 value 182.517300
## iter  60 value 182.515484
## final  value 182.515444 
## converged
## trying - percentprivm2 
## # weights:  69 (44 variable)
## initial  value 377.922627 
## iter  10 value 213.199216
## iter  20 value 163.918615
## iter  30 value 155.840865
## iter  40 value 155.609882
## iter  50 value 155.556248
## iter  60 value 155.547857
## iter  70 value 155.547570
## iter  70 value 155.547569
## iter  70 value 155.547569
## final  value 155.547569 
## converged
## trying - catpppm2 
## # weights:  63 (40 variable)
## initial  value 377.922627 
## iter  10 value 232.024555
## iter  20 value 173.021288
## iter  30 value 168.422665
## iter  40 value 168.280768
## iter  50 value 168.241832
## iter  60 value 168.240234
## final  value 168.240210 
## converged
## trying - sector 
## # weights:  60 (38 variable)
## initial  value 377.922627 
## iter  10 value 243.547814
## iter  20 value 168.109952
## iter  30 value 161.034268
## iter  40 value 160.847974
## iter  50 value 160.802625
## iter  60 value 160.800076
## final  value 160.799972 
## converged
## trying - catbidm2 
## # weights:  51 (32 variable)
## initial  value 377.922627 
## iter  10 value 237.432093
## iter  20 value 188.078599
## iter  30 value 183.539031
## iter  40 value 183.297383
## iter  50 value 183.269605
## iter  60 value 183.268975
## iter  60 value 183.268973
## iter  60 value 183.268973
## final  value 183.268973 
## converged
## trying - award 
## # weights:  60 (38 variable)
## initial  value 377.922627 
## iter  10 value 244.672361
## iter  20 value 177.208493
## iter  30 value 171.835823
## iter  40 value 171.659474
## iter  50 value 171.630674
## iter  60 value 171.629715
## final  value 171.629695 
## converged
## trying + debtm2 
## # weights:  75 (48 variable)
## initial  value 377.922627 
## iter  10 value 266.148730
## iter  20 value 172.158004
## iter  30 value 155.994029
## iter  40 value 154.422766
## iter  50 value 154.234559
## iter  60 value 154.215803
## iter  70 value 154.206328
## iter  80 value 154.201999
## final  value 154.201869 
## converged
##                 Df      AIC
## - sector        38 397.5999
## - percentprivm2 44 399.0951
## <none>          46 400.6260
## + +debtm2       48 404.4037
## - km2           44 413.2594
## - catpppm2      40 416.4804
## - award         38 419.2594
## - catbidm2      32 430.5379
## - ITm2          44 437.6354
## - tm2           44 453.0309
## # weights:  60 (38 variable)
## initial  value 377.922627 
## iter  10 value 243.547814
## iter  20 value 168.109952
## iter  30 value 161.034268
## iter  40 value 160.847974
## iter  50 value 160.802625
## iter  60 value 160.800076
## final  value 160.799972 
## converged
## 
## Step:  AIC=397.6
## statusm2 ~ ITm2 + km2 + tm2 + percentprivm2 + catpppm2 + catbidm2 + 
##     award
## 
## trying - ITm2 
## # weights:  57 (36 variable)
## initial  value 377.922627 
## iter  10 value 221.775094
## iter  20 value 182.491274
## iter  30 value 181.578035
## iter  40 value 181.482173
## iter  50 value 181.478400
## final  value 181.478153 
## converged
## trying - km2 
## # weights:  57 (36 variable)
## initial  value 377.922627 
## iter  10 value 224.757696
## iter  20 value 172.662936
## iter  30 value 170.184042
## iter  40 value 170.045424
## iter  50 value 170.017636
## final  value 170.017331 
## converged
## trying - tm2 
## # weights:  57 (36 variable)
## initial  value 377.922627 
## iter  10 value 256.820767
## iter  20 value 198.531288
## iter  30 value 197.420177
## iter  40 value 197.246957
## iter  50 value 197.227381
## iter  60 value 197.227086
## iter  60 value 197.227084
## iter  60 value 197.227084
## final  value 197.227084 
## converged
## trying - percentprivm2 
## # weights:  57 (36 variable)
## initial  value 377.922627 
## iter  10 value 227.468521
## iter  20 value 166.129864
## iter  30 value 161.985316
## iter  40 value 161.814732
## iter  50 value 161.789872
## iter  60 value 161.787493
## final  value 161.787467 
## converged
## trying - catpppm2 
## # weights:  51 (32 variable)
## initial  value 377.922627 
## iter  10 value 231.711999
## iter  20 value 173.605898
## iter  30 value 171.789749
## iter  40 value 171.656003
## iter  50 value 171.652786
## final  value 171.652543 
## converged
## trying - catbidm2 
## # weights:  39 (24 variable)
## initial  value 377.922627 
## iter  10 value 244.147803
## iter  20 value 195.249904
## iter  30 value 194.142548
## iter  40 value 194.034094
## iter  50 value 194.032352
## iter  50 value 194.032352
## final  value 194.032352 
## converged
## trying - award 
## # weights:  48 (30 variable)
## initial  value 377.922627 
## iter  10 value 241.308359
## iter  20 value 183.080527
## iter  30 value 181.051475
## iter  40 value 180.948079
## iter  50 value 180.945429
## final  value 180.945389 
## converged
## trying + sector 
## # weights:  72 (46 variable)
## initial  value 377.922627 
## iter  10 value 233.846879
## iter  20 value 164.163452
## iter  30 value 154.707814
## iter  40 value 154.401308
## iter  50 value 154.325017
## iter  60 value 154.314002
## iter  70 value 154.313084
## final  value 154.313023 
## converged
## trying + debtm2 
## # weights:  63 (40 variable)
## initial  value 377.922627 
## iter  10 value 266.158359
## iter  20 value 175.668743
## iter  30 value 161.275274
## iter  40 value 160.749367
## iter  50 value 160.694585
## iter  60 value 160.686216
## iter  70 value 160.684300
## final  value 160.684293 
## converged
##                 Df      AIC
## - percentprivm2 36 395.5749
## <none>          38 397.5999
## + +sector       46 400.6260
## + +debtm2       40 401.3686
## - catpppm2      32 407.3051
## - km2           36 412.0347
## - award         30 421.8908
## - ITm2          36 434.9563
## - catbidm2      24 436.0647
## - tm2           36 466.4542
## # weights:  57 (36 variable)
## initial  value 377.922627 
## iter  10 value 227.468521
## iter  20 value 166.129864
## iter  30 value 161.985316
## iter  40 value 161.814732
## iter  50 value 161.789872
## iter  60 value 161.787493
## final  value 161.787467 
## converged
## 
## Step:  AIC=395.57
## statusm2 ~ ITm2 + km2 + tm2 + catpppm2 + catbidm2 + award
## 
## trying - ITm2 
## # weights:  54 (34 variable)
## initial  value 377.922627 
## iter  10 value 214.352497
## iter  20 value 183.224284
## iter  30 value 182.574113
## iter  40 value 182.453938
## iter  50 value 182.451143
## final  value 182.451118 
## converged
## trying - km2 
## # weights:  54 (34 variable)
## initial  value 377.922627 
## iter  10 value 211.725910
## iter  20 value 173.563821
## iter  30 value 170.981479
## iter  40 value 170.846848
## iter  50 value 170.836472
## final  value 170.836390 
## converged
## trying - tm2 
## # weights:  54 (34 variable)
## initial  value 377.922627 
## iter  10 value 242.402374
## iter  20 value 202.712181
## iter  30 value 200.574047
## iter  40 value 200.386422
## iter  50 value 200.372228
## final  value 200.372133 
## converged
## trying - catpppm2 
## # weights:  48 (30 variable)
## initial  value 377.922627 
## iter  10 value 239.612200
## iter  20 value 173.103508
## iter  30 value 172.386505
## iter  40 value 172.300856
## iter  50 value 172.298286
## final  value 172.298277 
## converged
## trying - catbidm2 
## # weights:  36 (22 variable)
## initial  value 377.922627 
## iter  10 value 232.353776
## iter  20 value 197.498267
## iter  30 value 196.480177
## iter  40 value 196.348113
## final  value 196.345705 
## converged
## trying - award 
## # weights:  45 (28 variable)
## initial  value 377.922627 
## iter  10 value 242.357597
## iter  20 value 185.679286
## iter  30 value 184.146352
## iter  40 value 184.062300
## iter  50 value 184.059929
## final  value 184.059918 
## converged
## trying + percentprivm2 
## # weights:  60 (38 variable)
## initial  value 377.922627 
## iter  10 value 243.547814
## iter  20 value 168.109952
## iter  30 value 161.034268
## iter  40 value 160.847974
## iter  50 value 160.802625
## iter  60 value 160.800076
## final  value 160.799972 
## converged
## trying + sector 
## # weights:  69 (44 variable)
## initial  value 377.922627 
## iter  10 value 213.199216
## iter  20 value 163.918615
## iter  30 value 155.840865
## iter  40 value 155.609882
## iter  50 value 155.556248
## iter  60 value 155.547857
## iter  70 value 155.547570
## iter  70 value 155.547569
## iter  70 value 155.547569
## final  value 155.547569 
## converged
## trying + debtm2 
## # weights:  60 (38 variable)
## initial  value 377.922627 
## iter  10 value 233.210067
## iter  20 value 165.965814
## iter  30 value 161.910680
## iter  40 value 161.704706
## iter  50 value 161.661635
## iter  60 value 161.660435
## iter  70 value 161.660066
## final  value 161.660057 
## converged
##                  Df      AIC
## <none>           36 395.5749
## + +percentprivm2 38 397.5999
## + +sector        44 399.0951
## + +debtm2        38 399.3201
## - catpppm2       30 404.5966
## - km2            34 409.6728
## - award          28 424.1198
## - ITm2           34 432.9022
## - catbidm2       22 436.6914
## - tm2            34 468.7443

Método sugeriu a retirada de \(debtm2\) e \(percentprivm2\)

Modelo estimado sem \(debtm2\) e \(percentprivm2\)

logm2 = multinom(statusm2 ~ ITm2 + km2 + tm2 + catpppm2 + sector + catbidm2 + award, data=m2)
## # weights:  69 (44 variable)
## initial  value 377.922627 
## iter  10 value 213.199216
## iter  20 value 163.918615
## iter  30 value 155.840865
## iter  40 value 155.609882
## iter  50 value 155.556248
## iter  60 value 155.547857
## iter  70 value 155.547570
## iter  70 value 155.547569
## iter  70 value 155.547569
## final  value 155.547569 
## converged
summary(logm2)
## Call:
## multinom(formula = statusm2 ~ ITm2 + km2 + tm2 + catpppm2 + sector + 
##     catbidm2 + award, data = m2)
## 
## Coefficients:
##            (Intercept)       ITm2         km2       tm2 catpppm22  catpppm23
## Cancelled    -4.224918 0.03751531 -0.02842901 0.3121012  92.44691 -0.1908299
## Distressed   -3.788915 0.03702137 -0.02732782 0.3039021  90.72949 -1.6160153
##            catpppm24 sectorICT sectorMSW  sectorTrp sectorWater catbidm22
## Cancelled  -3.501328  2.940868 33.012360 -0.7770645   1.1125687  13.70522
## Distressed -5.821229  2.878987 -2.247188 -1.4861147   0.1654327 -12.58906
##            catbidm23 catbidm24  catbidm25 catbidm26  catbidm27 catbidm28
## Cancelled   27.53328  6.339004   1.329585   4.95275   3.634015  6.703867
## Distressed -13.11049  7.967968 -24.293123 -11.12637 -16.693422  8.473917
##            awardCompetitive bidding awardCompetitive negotiation
## Cancelled                 -3.172628                    -9.874664
## Distressed                -4.455466                   -42.765788
##            awardDirect negotiation awardLicense scheme
## Cancelled                 1.433699            48.43313
## Distressed                2.105192            48.67427
## 
## Std. Errors:
##            (Intercept)        ITm2         km2        tm2 catpppm22 catpppm23
## Cancelled    0.5539088 0.006547832 0.006535997 0.05496807 0.2893222 0.6486896
## Distressed   0.5683248 0.006613142 0.006623136 0.05598903 0.2893222 0.6914530
##            catpppm24 sectorICT    sectorMSW sectorTrp sectorWater    catbidm22
## Cancelled  0.7434367 0.5056660 4.675874e-15 0.6018174   0.7333978 3.395510e-09
## Distressed 1.0611400 0.5238442 1.173746e-19 0.6396489   0.8015557 4.266684e-13
##               catbidm23 catbidm24    catbidm25    catbidm26    catbidm27
## Cancelled  7.223096e-14 0.4530643 1.865790e-01 1.468225e+00 1.370490e+00
## Distressed 1.353251e-18 0.4508169 8.446196e-13 2.556969e-07 7.758955e-09
##            catbidm28 awardCompetitive bidding awardCompetitive negotiation
## Cancelled  0.7038888                0.6385195                 9.889307e-07
## Distressed 0.7193883                0.7498548                 1.269093e-16
##            awardDirect negotiation awardLicense scheme
## Cancelled                0.5297004           0.3901919
## Distressed               0.5010326           0.3901919
## 
## Residual Deviance: 311.0951 
## AIC: 399.0951

Análise da razão estatística de máxima verossimilhança (LR statistic)

lr_test <- lrtest(logm2)
## # weights:  6 (2 variable)
## initial  value 377.922627 
## final  value 348.419556 
## converged
print(lr_test)
## Likelihood ratio test
## 
## Model 1: statusm2 ~ ITm2 + km2 + tm2 + catpppm2 + sector + catbidm2 + 
##     award
## Model 2: statusm2 ~ 1
##   #Df  LogLik  Df  Chisq Pr(>Chisq)    
## 1  44 -155.55                          
## 2   2 -348.42 -42 385.74  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#coef_test(logm1)
# Calcular a log-verossimilhança do modelo completo (logm1)
log_likelihood_model <- logLik(logm2)

# Calcular a log-verossimilhança do modelo nulo (intercept apenas)
# Você pode criar um modelo nulo usando a função multinom com apenas ~ 1
null_model <- multinom(statusm2 ~ 1, data = m2)
## # weights:  6 (2 variable)
## initial  value 377.922627 
## final  value 348.419556 
## converged
log_likelihood_null <- logLik(null_model)

# Calcular o R² de McFadden
R2_McFadden <- 1 - (log_likelihood_model / log_likelihood_null)

# Exibir o resultado
print(R2_McFadden)
## 'log Lik.' 0.5535625 (df=44)

O modelo possui um ajuste melhor que o modelo com dados completos.

z = summary(logm2)$coefficients/summary(logm2)$standard.errors

z
##            (Intercept)     ITm2       km2      tm2 catpppm22  catpppm23
## Cancelled    -7.627461 5.729424 -4.349605 5.677862  319.5293 -0.2941775
## Distressed   -6.666812 5.598151 -4.126114 5.427886  313.5932 -2.3371298
##            catpppm24 sectorICT     sectorMSW sectorTrp sectorWater
## Cancelled  -4.709652  5.815830  7.060148e+15 -1.291196   1.5170058
## Distressed -5.485826  5.495884 -1.914544e+19 -2.323329   0.2063895
##                catbidm22     catbidm23 catbidm24     catbidm25     catbidm26
## Cancelled   4.036277e+09  3.811840e+14  13.99140  7.126122e+00  3.373292e+00
## Distressed -2.950549e+13 -9.688142e+18  17.67451 -2.876221e+13 -4.351390e+07
##                catbidm27 catbidm28 awardCompetitive bidding
## Cancelled   2.651618e+00  9.524044                -4.968726
## Distressed -2.151504e+09 11.779336                -5.941772
##            awardCompetitive negotiation awardDirect negotiation
## Cancelled                 -9.985193e+06                2.706621
## Distressed                -3.369791e+17                4.201706
##            awardLicense scheme
## Cancelled             124.1265
## Distressed            124.7444
# 2-tailed z test
p <- (1 - pnorm(abs(z), 0, 1)) * 2
 
p
##             (Intercept)         ITm2          km2          tm2 catpppm22
## Cancelled  2.398082e-14 1.007722e-08 1.363828e-05 1.363883e-08         0
## Distressed 2.614198e-11 2.166496e-08 3.689445e-05 5.702557e-08         0
##             catpppm23    catpppm24   sectorICT sectorMSW  sectorTrp sectorWater
## Cancelled  0.76862226 2.481396e-06 6.03337e-09         0 0.19663558   0.1292652
## Distressed 0.01943243 4.115421e-08 3.88757e-08         0 0.02016149   0.8364866
##            catbidm22 catbidm23 catbidm24    catbidm25    catbidm26  catbidm27
## Cancelled          0         0         0 1.032285e-12 0.0007427518 0.00801072
## Distressed         0         0         0 0.000000e+00 0.0000000000 0.00000000
##            catbidm28 awardCompetitive bidding awardCompetitive negotiation
## Cancelled          0             6.739435e-07                            0
## Distressed         0             2.819575e-09                            0
##            awardDirect negotiation awardLicense scheme
## Cancelled             6.797172e-03                   0
## Distressed            2.649108e-05                   0
##Razão de chances?
## extract the coefficients from the model and exponentiate
exp(coef(logm2))
##            (Intercept)     ITm2       km2      tm2    catpppm22 catpppm23
## Cancelled   0.01462653 1.038228 0.9719713 1.366293 1.409885e+40 0.8262731
## Distressed  0.02262014 1.037715 0.9730422 1.355136 2.531147e+39 0.1986888
##              catpppm24 sectorICT   sectorMSW sectorTrp sectorWater    catbidm22
## Cancelled  0.030157295  18.93227 2.17313e+14 0.4597536    3.042163 8.955719e+05
## Distressed 0.002963959  17.79624 1.05696e-01 0.2262500    1.179904 3.409103e-06
##               catbidm23 catbidm24    catbidm25    catbidm26    catbidm27
## Cancelled  9.068871e+11   566.232 3.779475e+00 1.415637e+02 3.786452e+01
## Distressed 2.023884e-06  2886.985 2.815989e-11 1.471906e-05 5.625215e-08
##            catbidm28 awardCompetitive bidding awardCompetitive negotiation
## Cancelled   815.5538               0.04189336                 5.146216e-05
## Distressed 4788.2328               0.01161490                 2.673341e-19
##            awardDirect negotiation awardLicense scheme
## Cancelled                 4.194183        1.082039e+21
## Distressed                8.208676        1.377104e+21

\(%A razão de chances entre o contrato ser cancelado e concluido considerando a variável Investimento total, ou seja, existe 0,99 chances de o contrato ser cancelado ao invés de concluido.\)

# Dados para a categoria Cancelled
cancelled_results <- data.frame(
  Variable = colnames(exp(coef(logm2))),
  Odds_Ratio = exp(coef(logm2)["Cancelled", ]),
  P_Value = p["Cancelled", ]
)

# Exibir a tabela
print(cancelled_results)#, n=23) 
##                                                  Variable   Odds_Ratio
## (Intercept)                                   (Intercept) 1.462653e-02
## ITm2                                                 ITm2 1.038228e+00
## km2                                                   km2 9.719713e-01
## tm2                                                   tm2 1.366293e+00
## catpppm22                                       catpppm22 1.409885e+40
## catpppm23                                       catpppm23 8.262731e-01
## catpppm24                                       catpppm24 3.015730e-02
## sectorICT                                       sectorICT 1.893227e+01
## sectorMSW                                       sectorMSW 2.173130e+14
## sectorTrp                                       sectorTrp 4.597536e-01
## sectorWater                                   sectorWater 3.042163e+00
## catbidm22                                       catbidm22 8.955719e+05
## catbidm23                                       catbidm23 9.068871e+11
## catbidm24                                       catbidm24 5.662320e+02
## catbidm25                                       catbidm25 3.779475e+00
## catbidm26                                       catbidm26 1.415637e+02
## catbidm27                                       catbidm27 3.786452e+01
## catbidm28                                       catbidm28 8.155538e+02
## awardCompetitive bidding         awardCompetitive bidding 4.189336e-02
## awardCompetitive negotiation awardCompetitive negotiation 5.146216e-05
## awardDirect negotiation           awardDirect negotiation 4.194183e+00
## awardLicense scheme                   awardLicense scheme 1.082039e+21
##                                   P_Value
## (Intercept)                  2.398082e-14
## ITm2                         1.007722e-08
## km2                          1.363828e-05
## tm2                          1.363883e-08
## catpppm22                    0.000000e+00
## catpppm23                    7.686223e-01
## catpppm24                    2.481396e-06
## sectorICT                    6.033370e-09
## sectorMSW                    0.000000e+00
## sectorTrp                    1.966356e-01
## sectorWater                  1.292652e-01
## catbidm22                    0.000000e+00
## catbidm23                    0.000000e+00
## catbidm24                    0.000000e+00
## catbidm25                    1.032285e-12
## catbidm26                    7.427518e-04
## catbidm27                    8.010720e-03
## catbidm28                    0.000000e+00
## awardCompetitive bidding     6.739435e-07
## awardCompetitive negotiation 0.000000e+00
## awardDirect negotiation      6.797172e-03
## awardLicense scheme          0.000000e+00

As variáveis \(catppm23\) Greenfield project, setor de transportes \(sectorTrp\) e setor de água e esgoto \(sectorWater\) são estatísticamente insgnificantes nos efeitos \(P(\frac{cancelled}{concluded})\).

A variável Investimento total \(ITm1\) possui efeito similar aos dados completos, ou seja, um efeito muito pequeno sobre a probabilidade de cancelamento.

\(tm1\) tempo de contrato possui efeito bem similar ao obervado nos dados completos, aumentando cerca de 31% a chance de cancelamento do contrato quando aumentando uma unidade do período de contrato.

Assim como nos dados completos a variável \(catpppm4\) (Management and Lease Contracts) possui mais influência para o evento ser concluido do que cancelado.

As variáveis setorias neste teste de robustez seguem a mesma ordem de influencia sobre o status dos contratos, contudo as variáveis \(sectorTrp\) e \(sectorWater\) não possuem significância estatística a 5%, mas ainda mantendo ordem de chance semelhante. A variável \(sectorICT\) continuou tendo maior chance de ser cancelada, porém a ordem da chance teve sua escala aumentada de 4.62 para 18.93.

Todos os efeitos nas variáveis categóricas bid mantiveram maior chance de cancelamento, modificando apenas a escala, sendo todas elas significante estatísticamente.

Nas variáveis categorica de adjudicação do contrato, apenas \(awardCompetitivenegociation\) teve mudança de posição, onde a mesma possui uma grande chance de ser conluida versus o cancelamento, em contraste com o resultado dos dados completos que possuiam 2 vezes mais chances de ser cancelada.

Distressed

# Dados para a categoria Distressed
distressed_results <- data.frame(
  Variable = colnames(exp(coef(logm2))),
  Odds_Ratio = exp(coef(logm2)["Distressed", ]),
  P_Value = p["Distressed", ]
)

# Exibir a tabela
distressed_results

Todas as variáveis presentes na categoria Distressed possuem a mesma direção de chance quando comparadas aos dados completos, mudando apenas a ordem de grandeza da razão de chances. Nessa estimação apenas a variável \(sectorWater\) não possui significância estatística.

# Obter as probabilidades estimadas
pp <- fitted(logm2)

# Definir um limite de probabilidade (por exemplo, 0,5)
limite_probabilidade <- 0.5

# Classificar as observações com base nas probabilidades
classificacao_estimada <- apply(pp, 1, function(x) {
  ifelse(max(x) >= limite_probabilidade, which.max(x), NA)
})

# Adicionar a classificação estimada ao conjunto de dados original
m2$classificacao_estimada <- classificacao_estimada


# Comparar com a classificação real
tabela_comparacao <- table(statusm2, m2$classificacao_estimada)
print(tabela_comparacao)
##             
## statusm2       1   2   3
##   Concluded  102   3   0
##   Cancelled    5 156  10
##   Distressed   2  36  21

Coluna 1: A categoria real da variável dependente.

Coluna 2: A categoria estimada pelo modelo.

Coluna 3: O número de observações que foram classificadas corretamente pelo modelo.

# Crie a tabela de comparação
tabela_comparacao <- table(statusm2, m2$classificacao_estimada)

# Calcule o número total de observações em cada categoria real
total_real <- rowSums(tabela_comparacao)

# Calcule o número total de observações estimadas em cada categoria
total_estimado <- colSums(tabela_comparacao)

# Calcule a taxa de precisão (percentual de acerto) para cada categoria
taxa_precisao <- diag(tabela_comparacao) / total_real * 100

# Calcule a taxa de precisão global (total)
taxa_precisao_global <- sum(diag(tabela_comparacao)) / sum(total_real) * 100

# Adicione o total estimado à tabela de comparação
tabela_comparacao <- rbind(tabela_comparacao, Total_Estimado = total_estimado)

# Adicione a taxa de precisão à tabela de comparação
tabela_comparacao <- cbind(tabela_comparacao, Taxa_Precisao = c(taxa_precisao, Taxa_Precisao_Global = taxa_precisao_global))

# Renomeie a última coluna
colnames(tabela_comparacao)[ncol(tabela_comparacao)] <- "Taxa de Precisão (%)"

# Imprima a tabela
print(tabela_comparacao)
##                  1   2  3 Taxa de Precisão (%)
## Concluded      102   3  0             97.14286
## Cancelled        5 156 10             91.22807
## Distressed       2  36 21             35.59322
## Total_Estimado 109 195 31             83.28358

A matriz de confusão apresentada reflete o desempenho de um modelo de classificação multiclasse. Conclusões por Classe:

Concluded (1): O modelo acertou 102 previsões desta classe, com uma taxa de precisão de 97.14%. Houve 3 casos que foram erroneamente classificados como ‘Cancelled’ e nenhum como ‘Distressed’.

Cancelled (2): Foram corretamente previstas 156 observações desta classe, com uma taxa de precisão de 91.23%. No entanto, o modelo cometeu erros ao classificar 5 casos como ‘Concluded’ e 10 como ‘Distressed’.

Distressed (3): O modelo acertou 21 previsões para esta classe, mas com uma taxa de precisão relativamente baixa de 35.59%. Houve 2 observações erroneamente classificadas como ‘Concluded’ e 36 como ‘Cancelled’.

Total Estimado: O total de estimativas mostra a soma das previsões do modelo para cada classe. Por exemplo, 109 observações foram previstas como ‘Concluded’, 195 como ‘Cancelled’ e 31 como ‘Distressed’.

Curva ROC

A curva ROC (Receiver Operating Characteristic) é usada para avaliar a capacidade preditiva de um modelo em relação à classificação binária. No entanto, ela pode ser adaptada para modelos de classificação multinomial usando várias estratégias, como a análise one-vs-rest ou one-vs-one.

A estratégia comum para criar uma curva ROC em um modelo logit multinomial:

O problema foi transformado em vários problemas de classificação binária. Isso foi feito treinando o modelo para cada classe em relação a todas as outras classes combinadas. Utilizando a métrica de interesse (como a taxa de verdadeiros positivos e a taxa de falsos positivos) para avaliar cada modelo de classificação binária resultante. A curva ROC com base nessas métricas para cada classe:

# Transformar o problema em vários problemas de classificação binária
# Supondo que "logm2" é o objeto que representa o modelo logit multinomial
# e "m2" é o conjunto de dados correspondente

# Adicione um pacote para trabalhar com a lógica de classificação


# Crie uma função para ajustar modelos one-vs-rest
fit_one_vs_rest <- function(class_label) {
  binary_data <- m2
  binary_data$target <- ifelse(statusm2 == class_label, 1, 0)
  binary_model <- glm(target ~ ITm2 + km2 + tm2 + percentprivm2 + catpppm2 + sector + debtm2 + catbidm2, data = binary_data, family = binomial)
  return(binary_model)
}

# Ajuste um modelo para cada classe
class_labels <- unique(statusm2)
binary_models <- lapply(class_labels, fit_one_vs_rest)
## Warning: glm.fit: probabilidades ajustadas numericamente 0 ou 1 ocorreu
# Calcular as métricas para cada modelo binário
roc_list <- lapply(binary_models, function(model) {
  roc_obj <- roc(statusm2, predict(model, type = "response"))
  return(roc_obj)
})
## Warning in roc.default(statusm2, predict(model, type = "response")): 'response'
## has more than two levels. Consider setting 'levels' explicitly or using
## 'multiclass.roc' instead
## Setting levels: control = Concluded, case = Cancelled
## Setting direction: controls < cases
## Warning in roc.default(statusm2, predict(model, type = "response")): 'response'
## has more than two levels. Consider setting 'levels' explicitly or using
## 'multiclass.roc' instead
## Setting levels: control = Concluded, case = Cancelled
## Setting direction: controls > cases
## Warning in roc.default(statusm2, predict(model, type = "response")): 'response'
## has more than two levels. Consider setting 'levels' explicitly or using
## 'multiclass.roc' instead
## Setting levels: control = Concluded, case = Cancelled
## Setting direction: controls < cases
# Plotar a curva ROC
plot(roc_list[[1]], col = 1, main = "Curvas ROC para o modelo logit multinomial")
for (i in 2:length(roc_list)) {
  lines(roc_list[[i]], col = i)
}
legend("bottomright", legend = class_labels, col = 1:length(roc_list), lty = 1)

A categoria Cancelled possui melhor medida de classificação considerando este exercício de robustez do que com os dados completos. O que também se reflete nas probabilidades estimadas abaixo.

Probabilidades estimadas

head(pp <- fitted(logm2)) 
##     Concluded Cancelled Distressed
## 1 0.026532102 0.6441932  0.3292747
## 2 0.025751525 0.6442726  0.3299759
## 3 0.004397499 0.6660511  0.3295514
## 4 0.003187578 0.8875625  0.1092499
## 5 0.005507103 0.8889833  0.1055096
## 6 0.001471664 0.8634406  0.1350877

Conclusão

De maneira geral o teste de robustez valida os resultados do modelo principal com os dados de todas as categorias de contrato.