# "limpar" ambiente
rm(list = ls())
# Carregar bibliotecas
library(readxl)
library(readr)
library(mfx)
library(caret)
library(pROC)
library(ResourceSelection)
library(modEvA)
library(foreign)
library(stargazer) #Hlavac, Marek (2022)
library(ggplot2)
library(glm2)
library(PerformanceAnalytics)
library(nnet) #logit multinomial
library(car)
library(reshape2)
library(lmtest)
library(MASS)
library(pROC)
library(dplyr)
library(margins)
library(broom)
Foi retirado a categoria Active da base de informações afim de testar se os resultados do modelo permanecem de maneira similar aos dados originais, afim de atestar o poder preditivo do modelo estimado.
m2 <- read_excel("C:/Users/kassy/OneDrive - caen.ufc.br/1. CAEN/3. Doutorado/Thesis/Test banco mundial/corrigidaBDsemactive.xlsx")
#with(m2, table(m2$catstatus, m2$`Project status`))
# Crie uma tabela cruzada com as contagens
tabela_contagem2 <- table(m2$`Project status`)
# Crie um data frame com a contagem e os percentuais
tabela_completa2 <- data.frame(Contagem = tabela_contagem2, Percentual = prop.table(tabela_contagem2) * 100)
# Exiba a tabela
print(tabela_completa2)
## Contagem.Var1 Contagem.Freq Percentual.Var1 Percentual.Freq
## 1 Cancelled 176 Cancelled 51.16279
## 2 Concluded 107 Concluded 31.10465
## 3 Distressed 61 Distressed 17.73256
\(ln(\frac{P(status=Cancelled)}{P(status=Concluded)})\), \(ln(\frac{P(status=Distressed)}{P(status=Concluded)})\)
m2$`Project status` <- as.factor(m2$`Project status`)
m2$`Project status` <- relevel(m2$`Project status`, ref = "Concluded")
is.factor(m2$`Project status`)
## [1] TRUE
ITm2=m2$TotalInvestment1
percentprivm2=m2$PercentPrivate
tm2=m2$ContractPeriod
km2=m2$PhysicalAssets
debtm2=m2$TotalDebtFunding
#catstatusm2=m2$catstatus
statusm2=m2$`Project status`
catpppm2=m2$catppp
#catsubtypem2=m2$catsubtype
#catsetorm2=m2$catsetor
#catsetor2m2=m2$catsetor2
#catsetorm2=m2$catsetor3
catbidm2 = m2$catbid
award = m2$AwardMethod
award = factor(award)
award <- relevel(award, ref = "Not Available")
m2$sectorR <- as.factor(m2$sectorR)
m2$sectorR <- relevel(m2$sectorR, ref = "Enrg")
is.factor(m2$sectorR)
## [1] TRUE
# numericamente
sector = m2$sectorR
catpppm2 = factor(catpppm2)
#catsetorm2 = factor(catsetorm2)
#catsetor2m2 = factor(catsetor2m2)
#catsetorm2 = factor(catsetorm2)
#catsubtypem2 = factor(catsubtypem2)
catbidm2 = factor(catbidm2)
#catstatusm2 = factor(catstatusm2)
#status=m2$`Project status`
logm20 = multinom(statusm2 ~ ITm2 + km2 + tm2 + percentprivm2 + catpppm2 + sector + debtm2 + catbidm2 + award, data=m2)
## # weights: 75 (48 variable)
## initial value 377.922627
## iter 10 value 266.148730
## iter 20 value 172.158004
## iter 30 value 155.994029
## iter 40 value 154.422766
## iter 50 value 154.234559
## iter 60 value 154.215803
## iter 70 value 154.206328
## iter 80 value 154.201999
## final value 154.201869
## converged
summary(logm20)
## Warning in sqrt(diag(vc)): NaNs produzidos
## Call:
## multinom(formula = statusm2 ~ ITm2 + km2 + tm2 + percentprivm2 +
## catpppm2 + sector + debtm2 + catbidm2 + award, data = m2)
##
## Coefficients:
## (Intercept) ITm2 km2 tm2 percentprivm2 catpppm22
## Cancelled -4.418900 0.03829133 -0.02930562 0.3370595 0.01989920 71.70498
## Distressed -3.190264 0.03787883 -0.02816970 0.3309051 0.01037829 69.85288
## catpppm23 catpppm24 sectorICT sectorMSW sectorTrp sectorWater
## Cancelled -0.3440331 -3.684041 3.080200 33.9383745 -0.9609535 1.1063518
## Distressed -1.7454542 -5.861811 3.062471 -0.8154525 -1.5494055 0.1674533
## debtm2 catbidm22 catbidm23 catbidm24 catbidm25 catbidm26
## Cancelled -0.007354351 16.1119 38.85502 6.524841 1.36369 4.925596
## Distressed -0.007816891 -14.3451 -20.76899 8.088845 -41.34901 -19.477142
## catbidm27 catbidm28 awardCompetitive bidding
## Cancelled 3.672508 6.869917 -3.439337
## Distressed -38.530712 8.747450 -4.680964
## awardCompetitive negotiation awardDirect negotiation
## Cancelled -38.67963 1.595503
## Distressed -50.55142 2.204469
## awardLicense scheme
## Cancelled 49.11250
## Distressed 49.45474
##
## Std. Errors:
## (Intercept) ITm2 km2 tm2 percentprivm2
## Cancelled 0.4799097 0.006727261 0.006650749 0.06089194 0.01557380
## Distressed 0.4762275 0.006827769 0.006751559 0.06193099 0.01612834
## catpppm22 catpppm23 catpppm24 sectorICT sectorMSW sectorTrp
## Cancelled 0.2962723 0.6255181 0.7667557 0.4957325 NaN 0.5245542
## Distressed 0.2962723 0.6544662 1.0642272 0.5106026 NaN 0.5391721
## sectorWater debtm2 catbidm22 catbidm23 catbidm24
## Cancelled 0.6813492 0.007486729 1.772356e-10 NaN 0.4303081
## Distressed 0.7396463 0.007492290 5.748668e-15 6.712961e-27 0.4124719
## catbidm25 catbidm26 catbidm27 catbidm28
## Cancelled 1.568331e-01 5.893464e-01 1.338475e+00 0.6733672
## Distressed 2.061250e-20 2.530027e-11 2.397360e-18 0.5528887
## awardCompetitive bidding awardCompetitive negotiation
## Cancelled 0.6508419 1.942703e-07
## Distressed 0.7677266 1.942703e-07
## awardDirect negotiation awardLicense scheme
## Cancelled 0.5529975 0.3935283
## Distressed 0.5153553 0.3935283
##
## Residual Deviance: 308.4037
## AIC: 404.4037
O método auxilia em selecionar as variáveis importantes para o modelo.
step1 <- step(logm20, direction = 'both')
## Start: AIC=404.4
## statusm2 ~ ITm2 + km2 + tm2 + percentprivm2 + catpppm2 + sector +
## debtm2 + catbidm2 + award
##
## trying - ITm2
## # weights: 72 (46 variable)
## initial value 377.922627
## iter 10 value 251.195300
## iter 20 value 179.532584
## iter 30 value 175.010637
## iter 40 value 174.632608
## iter 50 value 174.589944
## iter 60 value 174.585444
## iter 70 value 174.584965
## final value 174.584962
## converged
## trying - km2
## # weights: 72 (46 variable)
## initial value 377.922627
## iter 10 value 235.376226
## iter 20 value 167.930356
## iter 30 value 162.860214
## iter 40 value 162.435052
## iter 50 value 162.338045
## iter 60 value 162.337714
## iter 70 value 162.337263
## final value 162.337143
## converged
## trying - tm2
## # weights: 72 (46 variable)
## initial value 377.922627
## iter 10 value 271.054031
## iter 20 value 190.795650
## iter 30 value 182.024825
## iter 40 value 181.650946
## iter 50 value 181.512760
## iter 60 value 181.510684
## iter 70 value 181.510446
## final value 181.510442
## converged
## trying - percentprivm2
## # weights: 72 (46 variable)
## initial value 377.922627
## iter 10 value 245.849093
## iter 20 value 166.559016
## iter 30 value 155.748381
## iter 40 value 155.484375
## iter 50 value 155.407477
## iter 60 value 155.399380
## iter 70 value 155.391423
## iter 80 value 155.390909
## final value 155.390896
## converged
## trying - catpppm2
## # weights: 66 (42 variable)
## initial value 377.922627
## iter 10 value 266.182672
## iter 20 value 177.228864
## iter 30 value 168.530898
## iter 40 value 168.184364
## iter 50 value 168.132584
## iter 60 value 168.096270
## iter 70 value 168.095229
## iter 70 value 168.095228
## iter 70 value 168.095228
## final value 168.095228
## converged
## trying - sector
## # weights: 63 (40 variable)
## initial value 377.922627
## iter 10 value 266.158359
## iter 20 value 175.668743
## iter 30 value 161.275274
## iter 40 value 160.749367
## iter 50 value 160.694585
## iter 60 value 160.686216
## iter 70 value 160.684300
## final value 160.684293
## converged
## trying - debtm2
## # weights: 72 (46 variable)
## initial value 377.922627
## iter 10 value 233.846879
## iter 20 value 164.163452
## iter 30 value 154.707814
## iter 40 value 154.401308
## iter 50 value 154.325017
## iter 60 value 154.314002
## iter 70 value 154.313084
## final value 154.313023
## converged
## trying - catbidm2
## # weights: 54 (34 variable)
## initial value 377.922627
## iter 10 value 266.198030
## iter 20 value 192.396053
## iter 30 value 182.583911
## iter 40 value 182.123550
## iter 50 value 182.089515
## iter 60 value 182.086634
## final value 182.086487
## converged
## trying - award
## # weights: 63 (40 variable)
## initial value 377.922627
## iter 10 value 266.148538
## iter 20 value 181.115179
## iter 30 value 171.812337
## iter 40 value 171.488692
## iter 50 value 171.466307
## iter 60 value 171.463965
## final value 171.463825
## converged
## Df AIC
## - debtm2 46 400.6260
## - sector 40 401.3686
## - percentprivm2 46 402.7818
## <none> 48 404.4037
## - km2 46 416.6743
## - catpppm2 42 420.1905
## - award 40 422.9276
## - catbidm2 34 432.1730
## - ITm2 46 441.1699
## - tm2 46 455.0209
## # weights: 72 (46 variable)
## initial value 377.922627
## iter 10 value 233.846879
## iter 20 value 164.163452
## iter 30 value 154.707814
## iter 40 value 154.401308
## iter 50 value 154.325017
## iter 60 value 154.314002
## iter 70 value 154.313084
## final value 154.313023
## converged
##
## Step: AIC=400.63
## statusm2 ~ ITm2 + km2 + tm2 + percentprivm2 + catpppm2 + sector +
## catbidm2 + award
##
## trying - ITm2
## # weights: 69 (44 variable)
## initial value 377.922627
## iter 10 value 223.356493
## iter 20 value 177.604971
## iter 30 value 175.102283
## iter 40 value 174.869174
## iter 50 value 174.819611
## iter 60 value 174.817757
## final value 174.817722
## converged
## trying - km2
## # weights: 69 (44 variable)
## initial value 377.922627
## iter 10 value 224.739495
## iter 20 value 169.683214
## iter 30 value 162.917068
## iter 40 value 162.703404
## iter 50 value 162.632319
## iter 60 value 162.629732
## final value 162.629702
## converged
## trying - tm2
## # weights: 69 (44 variable)
## initial value 377.922627
## iter 10 value 255.152474
## iter 20 value 188.591818
## iter 30 value 182.830257
## iter 40 value 182.556625
## iter 50 value 182.517300
## iter 60 value 182.515484
## final value 182.515444
## converged
## trying - percentprivm2
## # weights: 69 (44 variable)
## initial value 377.922627
## iter 10 value 213.199216
## iter 20 value 163.918615
## iter 30 value 155.840865
## iter 40 value 155.609882
## iter 50 value 155.556248
## iter 60 value 155.547857
## iter 70 value 155.547570
## iter 70 value 155.547569
## iter 70 value 155.547569
## final value 155.547569
## converged
## trying - catpppm2
## # weights: 63 (40 variable)
## initial value 377.922627
## iter 10 value 232.024555
## iter 20 value 173.021288
## iter 30 value 168.422665
## iter 40 value 168.280768
## iter 50 value 168.241832
## iter 60 value 168.240234
## final value 168.240210
## converged
## trying - sector
## # weights: 60 (38 variable)
## initial value 377.922627
## iter 10 value 243.547814
## iter 20 value 168.109952
## iter 30 value 161.034268
## iter 40 value 160.847974
## iter 50 value 160.802625
## iter 60 value 160.800076
## final value 160.799972
## converged
## trying - catbidm2
## # weights: 51 (32 variable)
## initial value 377.922627
## iter 10 value 237.432093
## iter 20 value 188.078599
## iter 30 value 183.539031
## iter 40 value 183.297383
## iter 50 value 183.269605
## iter 60 value 183.268975
## iter 60 value 183.268973
## iter 60 value 183.268973
## final value 183.268973
## converged
## trying - award
## # weights: 60 (38 variable)
## initial value 377.922627
## iter 10 value 244.672361
## iter 20 value 177.208493
## iter 30 value 171.835823
## iter 40 value 171.659474
## iter 50 value 171.630674
## iter 60 value 171.629715
## final value 171.629695
## converged
## trying + debtm2
## # weights: 75 (48 variable)
## initial value 377.922627
## iter 10 value 266.148730
## iter 20 value 172.158004
## iter 30 value 155.994029
## iter 40 value 154.422766
## iter 50 value 154.234559
## iter 60 value 154.215803
## iter 70 value 154.206328
## iter 80 value 154.201999
## final value 154.201869
## converged
## Df AIC
## - sector 38 397.5999
## - percentprivm2 44 399.0951
## <none> 46 400.6260
## + +debtm2 48 404.4037
## - km2 44 413.2594
## - catpppm2 40 416.4804
## - award 38 419.2594
## - catbidm2 32 430.5379
## - ITm2 44 437.6354
## - tm2 44 453.0309
## # weights: 60 (38 variable)
## initial value 377.922627
## iter 10 value 243.547814
## iter 20 value 168.109952
## iter 30 value 161.034268
## iter 40 value 160.847974
## iter 50 value 160.802625
## iter 60 value 160.800076
## final value 160.799972
## converged
##
## Step: AIC=397.6
## statusm2 ~ ITm2 + km2 + tm2 + percentprivm2 + catpppm2 + catbidm2 +
## award
##
## trying - ITm2
## # weights: 57 (36 variable)
## initial value 377.922627
## iter 10 value 221.775094
## iter 20 value 182.491274
## iter 30 value 181.578035
## iter 40 value 181.482173
## iter 50 value 181.478400
## final value 181.478153
## converged
## trying - km2
## # weights: 57 (36 variable)
## initial value 377.922627
## iter 10 value 224.757696
## iter 20 value 172.662936
## iter 30 value 170.184042
## iter 40 value 170.045424
## iter 50 value 170.017636
## final value 170.017331
## converged
## trying - tm2
## # weights: 57 (36 variable)
## initial value 377.922627
## iter 10 value 256.820767
## iter 20 value 198.531288
## iter 30 value 197.420177
## iter 40 value 197.246957
## iter 50 value 197.227381
## iter 60 value 197.227086
## iter 60 value 197.227084
## iter 60 value 197.227084
## final value 197.227084
## converged
## trying - percentprivm2
## # weights: 57 (36 variable)
## initial value 377.922627
## iter 10 value 227.468521
## iter 20 value 166.129864
## iter 30 value 161.985316
## iter 40 value 161.814732
## iter 50 value 161.789872
## iter 60 value 161.787493
## final value 161.787467
## converged
## trying - catpppm2
## # weights: 51 (32 variable)
## initial value 377.922627
## iter 10 value 231.711999
## iter 20 value 173.605898
## iter 30 value 171.789749
## iter 40 value 171.656003
## iter 50 value 171.652786
## final value 171.652543
## converged
## trying - catbidm2
## # weights: 39 (24 variable)
## initial value 377.922627
## iter 10 value 244.147803
## iter 20 value 195.249904
## iter 30 value 194.142548
## iter 40 value 194.034094
## iter 50 value 194.032352
## iter 50 value 194.032352
## final value 194.032352
## converged
## trying - award
## # weights: 48 (30 variable)
## initial value 377.922627
## iter 10 value 241.308359
## iter 20 value 183.080527
## iter 30 value 181.051475
## iter 40 value 180.948079
## iter 50 value 180.945429
## final value 180.945389
## converged
## trying + sector
## # weights: 72 (46 variable)
## initial value 377.922627
## iter 10 value 233.846879
## iter 20 value 164.163452
## iter 30 value 154.707814
## iter 40 value 154.401308
## iter 50 value 154.325017
## iter 60 value 154.314002
## iter 70 value 154.313084
## final value 154.313023
## converged
## trying + debtm2
## # weights: 63 (40 variable)
## initial value 377.922627
## iter 10 value 266.158359
## iter 20 value 175.668743
## iter 30 value 161.275274
## iter 40 value 160.749367
## iter 50 value 160.694585
## iter 60 value 160.686216
## iter 70 value 160.684300
## final value 160.684293
## converged
## Df AIC
## - percentprivm2 36 395.5749
## <none> 38 397.5999
## + +sector 46 400.6260
## + +debtm2 40 401.3686
## - catpppm2 32 407.3051
## - km2 36 412.0347
## - award 30 421.8908
## - ITm2 36 434.9563
## - catbidm2 24 436.0647
## - tm2 36 466.4542
## # weights: 57 (36 variable)
## initial value 377.922627
## iter 10 value 227.468521
## iter 20 value 166.129864
## iter 30 value 161.985316
## iter 40 value 161.814732
## iter 50 value 161.789872
## iter 60 value 161.787493
## final value 161.787467
## converged
##
## Step: AIC=395.57
## statusm2 ~ ITm2 + km2 + tm2 + catpppm2 + catbidm2 + award
##
## trying - ITm2
## # weights: 54 (34 variable)
## initial value 377.922627
## iter 10 value 214.352497
## iter 20 value 183.224284
## iter 30 value 182.574113
## iter 40 value 182.453938
## iter 50 value 182.451143
## final value 182.451118
## converged
## trying - km2
## # weights: 54 (34 variable)
## initial value 377.922627
## iter 10 value 211.725910
## iter 20 value 173.563821
## iter 30 value 170.981479
## iter 40 value 170.846848
## iter 50 value 170.836472
## final value 170.836390
## converged
## trying - tm2
## # weights: 54 (34 variable)
## initial value 377.922627
## iter 10 value 242.402374
## iter 20 value 202.712181
## iter 30 value 200.574047
## iter 40 value 200.386422
## iter 50 value 200.372228
## final value 200.372133
## converged
## trying - catpppm2
## # weights: 48 (30 variable)
## initial value 377.922627
## iter 10 value 239.612200
## iter 20 value 173.103508
## iter 30 value 172.386505
## iter 40 value 172.300856
## iter 50 value 172.298286
## final value 172.298277
## converged
## trying - catbidm2
## # weights: 36 (22 variable)
## initial value 377.922627
## iter 10 value 232.353776
## iter 20 value 197.498267
## iter 30 value 196.480177
## iter 40 value 196.348113
## final value 196.345705
## converged
## trying - award
## # weights: 45 (28 variable)
## initial value 377.922627
## iter 10 value 242.357597
## iter 20 value 185.679286
## iter 30 value 184.146352
## iter 40 value 184.062300
## iter 50 value 184.059929
## final value 184.059918
## converged
## trying + percentprivm2
## # weights: 60 (38 variable)
## initial value 377.922627
## iter 10 value 243.547814
## iter 20 value 168.109952
## iter 30 value 161.034268
## iter 40 value 160.847974
## iter 50 value 160.802625
## iter 60 value 160.800076
## final value 160.799972
## converged
## trying + sector
## # weights: 69 (44 variable)
## initial value 377.922627
## iter 10 value 213.199216
## iter 20 value 163.918615
## iter 30 value 155.840865
## iter 40 value 155.609882
## iter 50 value 155.556248
## iter 60 value 155.547857
## iter 70 value 155.547570
## iter 70 value 155.547569
## iter 70 value 155.547569
## final value 155.547569
## converged
## trying + debtm2
## # weights: 60 (38 variable)
## initial value 377.922627
## iter 10 value 233.210067
## iter 20 value 165.965814
## iter 30 value 161.910680
## iter 40 value 161.704706
## iter 50 value 161.661635
## iter 60 value 161.660435
## iter 70 value 161.660066
## final value 161.660057
## converged
## Df AIC
## <none> 36 395.5749
## + +percentprivm2 38 397.5999
## + +sector 44 399.0951
## + +debtm2 38 399.3201
## - catpppm2 30 404.5966
## - km2 34 409.6728
## - award 28 424.1198
## - ITm2 34 432.9022
## - catbidm2 22 436.6914
## - tm2 34 468.7443
Método sugeriu a retirada de \(debtm2\) e \(percentprivm2\)
logm2 = multinom(statusm2 ~ ITm2 + km2 + tm2 + catpppm2 + sector + catbidm2 + award, data=m2)
## # weights: 69 (44 variable)
## initial value 377.922627
## iter 10 value 213.199216
## iter 20 value 163.918615
## iter 30 value 155.840865
## iter 40 value 155.609882
## iter 50 value 155.556248
## iter 60 value 155.547857
## iter 70 value 155.547570
## iter 70 value 155.547569
## iter 70 value 155.547569
## final value 155.547569
## converged
summary(logm2)
## Call:
## multinom(formula = statusm2 ~ ITm2 + km2 + tm2 + catpppm2 + sector +
## catbidm2 + award, data = m2)
##
## Coefficients:
## (Intercept) ITm2 km2 tm2 catpppm22 catpppm23
## Cancelled -4.224918 0.03751531 -0.02842901 0.3121012 92.44691 -0.1908299
## Distressed -3.788915 0.03702137 -0.02732782 0.3039021 90.72949 -1.6160153
## catpppm24 sectorICT sectorMSW sectorTrp sectorWater catbidm22
## Cancelled -3.501328 2.940868 33.012360 -0.7770645 1.1125687 13.70522
## Distressed -5.821229 2.878987 -2.247188 -1.4861147 0.1654327 -12.58906
## catbidm23 catbidm24 catbidm25 catbidm26 catbidm27 catbidm28
## Cancelled 27.53328 6.339004 1.329585 4.95275 3.634015 6.703867
## Distressed -13.11049 7.967968 -24.293123 -11.12637 -16.693422 8.473917
## awardCompetitive bidding awardCompetitive negotiation
## Cancelled -3.172628 -9.874664
## Distressed -4.455466 -42.765788
## awardDirect negotiation awardLicense scheme
## Cancelled 1.433699 48.43313
## Distressed 2.105192 48.67427
##
## Std. Errors:
## (Intercept) ITm2 km2 tm2 catpppm22 catpppm23
## Cancelled 0.5539088 0.006547832 0.006535997 0.05496807 0.2893222 0.6486896
## Distressed 0.5683248 0.006613142 0.006623136 0.05598903 0.2893222 0.6914530
## catpppm24 sectorICT sectorMSW sectorTrp sectorWater catbidm22
## Cancelled 0.7434367 0.5056660 4.675874e-15 0.6018174 0.7333978 3.395510e-09
## Distressed 1.0611400 0.5238442 1.173746e-19 0.6396489 0.8015557 4.266684e-13
## catbidm23 catbidm24 catbidm25 catbidm26 catbidm27
## Cancelled 7.223096e-14 0.4530643 1.865790e-01 1.468225e+00 1.370490e+00
## Distressed 1.353251e-18 0.4508169 8.446196e-13 2.556969e-07 7.758955e-09
## catbidm28 awardCompetitive bidding awardCompetitive negotiation
## Cancelled 0.7038888 0.6385195 9.889307e-07
## Distressed 0.7193883 0.7498548 1.269093e-16
## awardDirect negotiation awardLicense scheme
## Cancelled 0.5297004 0.3901919
## Distressed 0.5010326 0.3901919
##
## Residual Deviance: 311.0951
## AIC: 399.0951
lr_test <- lrtest(logm2)
## # weights: 6 (2 variable)
## initial value 377.922627
## final value 348.419556
## converged
print(lr_test)
## Likelihood ratio test
##
## Model 1: statusm2 ~ ITm2 + km2 + tm2 + catpppm2 + sector + catbidm2 +
## award
## Model 2: statusm2 ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 44 -155.55
## 2 2 -348.42 -42 385.74 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#coef_test(logm1)
# Calcular a log-verossimilhança do modelo completo (logm1)
log_likelihood_model <- logLik(logm2)
# Calcular a log-verossimilhança do modelo nulo (intercept apenas)
# Você pode criar um modelo nulo usando a função multinom com apenas ~ 1
null_model <- multinom(statusm2 ~ 1, data = m2)
## # weights: 6 (2 variable)
## initial value 377.922627
## final value 348.419556
## converged
log_likelihood_null <- logLik(null_model)
# Calcular o R² de McFadden
R2_McFadden <- 1 - (log_likelihood_model / log_likelihood_null)
# Exibir o resultado
print(R2_McFadden)
## 'log Lik.' 0.5535625 (df=44)
O modelo possui um ajuste melhor que o modelo com dados completos.
z = summary(logm2)$coefficients/summary(logm2)$standard.errors
z
## (Intercept) ITm2 km2 tm2 catpppm22 catpppm23
## Cancelled -7.627461 5.729424 -4.349605 5.677862 319.5293 -0.2941775
## Distressed -6.666812 5.598151 -4.126114 5.427886 313.5932 -2.3371298
## catpppm24 sectorICT sectorMSW sectorTrp sectorWater
## Cancelled -4.709652 5.815830 7.060148e+15 -1.291196 1.5170058
## Distressed -5.485826 5.495884 -1.914544e+19 -2.323329 0.2063895
## catbidm22 catbidm23 catbidm24 catbidm25 catbidm26
## Cancelled 4.036277e+09 3.811840e+14 13.99140 7.126122e+00 3.373292e+00
## Distressed -2.950549e+13 -9.688142e+18 17.67451 -2.876221e+13 -4.351390e+07
## catbidm27 catbidm28 awardCompetitive bidding
## Cancelled 2.651618e+00 9.524044 -4.968726
## Distressed -2.151504e+09 11.779336 -5.941772
## awardCompetitive negotiation awardDirect negotiation
## Cancelled -9.985193e+06 2.706621
## Distressed -3.369791e+17 4.201706
## awardLicense scheme
## Cancelled 124.1265
## Distressed 124.7444
# 2-tailed z test
p <- (1 - pnorm(abs(z), 0, 1)) * 2
p
## (Intercept) ITm2 km2 tm2 catpppm22
## Cancelled 2.398082e-14 1.007722e-08 1.363828e-05 1.363883e-08 0
## Distressed 2.614198e-11 2.166496e-08 3.689445e-05 5.702557e-08 0
## catpppm23 catpppm24 sectorICT sectorMSW sectorTrp sectorWater
## Cancelled 0.76862226 2.481396e-06 6.03337e-09 0 0.19663558 0.1292652
## Distressed 0.01943243 4.115421e-08 3.88757e-08 0 0.02016149 0.8364866
## catbidm22 catbidm23 catbidm24 catbidm25 catbidm26 catbidm27
## Cancelled 0 0 0 1.032285e-12 0.0007427518 0.00801072
## Distressed 0 0 0 0.000000e+00 0.0000000000 0.00000000
## catbidm28 awardCompetitive bidding awardCompetitive negotiation
## Cancelled 0 6.739435e-07 0
## Distressed 0 2.819575e-09 0
## awardDirect negotiation awardLicense scheme
## Cancelled 6.797172e-03 0
## Distressed 2.649108e-05 0
##Razão de chances?
## extract the coefficients from the model and exponentiate
exp(coef(logm2))
## (Intercept) ITm2 km2 tm2 catpppm22 catpppm23
## Cancelled 0.01462653 1.038228 0.9719713 1.366293 1.409885e+40 0.8262731
## Distressed 0.02262014 1.037715 0.9730422 1.355136 2.531147e+39 0.1986888
## catpppm24 sectorICT sectorMSW sectorTrp sectorWater catbidm22
## Cancelled 0.030157295 18.93227 2.17313e+14 0.4597536 3.042163 8.955719e+05
## Distressed 0.002963959 17.79624 1.05696e-01 0.2262500 1.179904 3.409103e-06
## catbidm23 catbidm24 catbidm25 catbidm26 catbidm27
## Cancelled 9.068871e+11 566.232 3.779475e+00 1.415637e+02 3.786452e+01
## Distressed 2.023884e-06 2886.985 2.815989e-11 1.471906e-05 5.625215e-08
## catbidm28 awardCompetitive bidding awardCompetitive negotiation
## Cancelled 815.5538 0.04189336 5.146216e-05
## Distressed 4788.2328 0.01161490 2.673341e-19
## awardDirect negotiation awardLicense scheme
## Cancelled 4.194183 1.082039e+21
## Distressed 8.208676 1.377104e+21
\(%A razão de chances entre o contrato ser cancelado e concluido considerando a variável Investimento total, ou seja, existe 0,99 chances de o contrato ser cancelado ao invés de concluido.\)
# Dados para a categoria Cancelled
cancelled_results <- data.frame(
Variable = colnames(exp(coef(logm2))),
Odds_Ratio = exp(coef(logm2)["Cancelled", ]),
P_Value = p["Cancelled", ]
)
# Exibir a tabela
print(cancelled_results)#, n=23)
## Variable Odds_Ratio
## (Intercept) (Intercept) 1.462653e-02
## ITm2 ITm2 1.038228e+00
## km2 km2 9.719713e-01
## tm2 tm2 1.366293e+00
## catpppm22 catpppm22 1.409885e+40
## catpppm23 catpppm23 8.262731e-01
## catpppm24 catpppm24 3.015730e-02
## sectorICT sectorICT 1.893227e+01
## sectorMSW sectorMSW 2.173130e+14
## sectorTrp sectorTrp 4.597536e-01
## sectorWater sectorWater 3.042163e+00
## catbidm22 catbidm22 8.955719e+05
## catbidm23 catbidm23 9.068871e+11
## catbidm24 catbidm24 5.662320e+02
## catbidm25 catbidm25 3.779475e+00
## catbidm26 catbidm26 1.415637e+02
## catbidm27 catbidm27 3.786452e+01
## catbidm28 catbidm28 8.155538e+02
## awardCompetitive bidding awardCompetitive bidding 4.189336e-02
## awardCompetitive negotiation awardCompetitive negotiation 5.146216e-05
## awardDirect negotiation awardDirect negotiation 4.194183e+00
## awardLicense scheme awardLicense scheme 1.082039e+21
## P_Value
## (Intercept) 2.398082e-14
## ITm2 1.007722e-08
## km2 1.363828e-05
## tm2 1.363883e-08
## catpppm22 0.000000e+00
## catpppm23 7.686223e-01
## catpppm24 2.481396e-06
## sectorICT 6.033370e-09
## sectorMSW 0.000000e+00
## sectorTrp 1.966356e-01
## sectorWater 1.292652e-01
## catbidm22 0.000000e+00
## catbidm23 0.000000e+00
## catbidm24 0.000000e+00
## catbidm25 1.032285e-12
## catbidm26 7.427518e-04
## catbidm27 8.010720e-03
## catbidm28 0.000000e+00
## awardCompetitive bidding 6.739435e-07
## awardCompetitive negotiation 0.000000e+00
## awardDirect negotiation 6.797172e-03
## awardLicense scheme 0.000000e+00
As variáveis \(catppm23\) Greenfield project, setor de transportes \(sectorTrp\) e setor de água e esgoto \(sectorWater\) são estatísticamente insgnificantes nos efeitos \(P(\frac{cancelled}{concluded})\).
A variável Investimento total \(ITm1\) possui efeito similar aos dados completos, ou seja, um efeito muito pequeno sobre a probabilidade de cancelamento.
\(tm1\) tempo de contrato possui efeito bem similar ao obervado nos dados completos, aumentando cerca de 31% a chance de cancelamento do contrato quando aumentando uma unidade do período de contrato.
Assim como nos dados completos a variável \(catpppm4\) (Management and Lease Contracts) possui mais influência para o evento ser concluido do que cancelado.
As variáveis setorias neste teste de robustez seguem a mesma ordem de influencia sobre o status dos contratos, contudo as variáveis \(sectorTrp\) e \(sectorWater\) não possuem significância estatística a 5%, mas ainda mantendo ordem de chance semelhante. A variável \(sectorICT\) continuou tendo maior chance de ser cancelada, porém a ordem da chance teve sua escala aumentada de 4.62 para 18.93.
Todos os efeitos nas variáveis categóricas bid mantiveram maior chance de cancelamento, modificando apenas a escala, sendo todas elas significante estatísticamente.
Nas variáveis categorica de adjudicação do contrato, apenas \(awardCompetitivenegociation\) teve mudança de posição, onde a mesma possui uma grande chance de ser conluida versus o cancelamento, em contraste com o resultado dos dados completos que possuiam 2 vezes mais chances de ser cancelada.
Distressed
# Dados para a categoria Distressed
distressed_results <- data.frame(
Variable = colnames(exp(coef(logm2))),
Odds_Ratio = exp(coef(logm2)["Distressed", ]),
P_Value = p["Distressed", ]
)
# Exibir a tabela
distressed_results
Todas as variáveis presentes na categoria Distressed possuem a mesma direção de chance quando comparadas aos dados completos, mudando apenas a ordem de grandeza da razão de chances. Nessa estimação apenas a variável \(sectorWater\) não possui significância estatística.
# Obter as probabilidades estimadas
pp <- fitted(logm2)
# Definir um limite de probabilidade (por exemplo, 0,5)
limite_probabilidade <- 0.5
# Classificar as observações com base nas probabilidades
classificacao_estimada <- apply(pp, 1, function(x) {
ifelse(max(x) >= limite_probabilidade, which.max(x), NA)
})
# Adicionar a classificação estimada ao conjunto de dados original
m2$classificacao_estimada <- classificacao_estimada
# Comparar com a classificação real
tabela_comparacao <- table(statusm2, m2$classificacao_estimada)
print(tabela_comparacao)
##
## statusm2 1 2 3
## Concluded 102 3 0
## Cancelled 5 156 10
## Distressed 2 36 21
Coluna 1: A categoria real da variável dependente.
Coluna 2: A categoria estimada pelo modelo.
Coluna 3: O número de observações que foram classificadas corretamente pelo modelo.
# Crie a tabela de comparação
tabela_comparacao <- table(statusm2, m2$classificacao_estimada)
# Calcule o número total de observações em cada categoria real
total_real <- rowSums(tabela_comparacao)
# Calcule o número total de observações estimadas em cada categoria
total_estimado <- colSums(tabela_comparacao)
# Calcule a taxa de precisão (percentual de acerto) para cada categoria
taxa_precisao <- diag(tabela_comparacao) / total_real * 100
# Calcule a taxa de precisão global (total)
taxa_precisao_global <- sum(diag(tabela_comparacao)) / sum(total_real) * 100
# Adicione o total estimado à tabela de comparação
tabela_comparacao <- rbind(tabela_comparacao, Total_Estimado = total_estimado)
# Adicione a taxa de precisão à tabela de comparação
tabela_comparacao <- cbind(tabela_comparacao, Taxa_Precisao = c(taxa_precisao, Taxa_Precisao_Global = taxa_precisao_global))
# Renomeie a última coluna
colnames(tabela_comparacao)[ncol(tabela_comparacao)] <- "Taxa de Precisão (%)"
# Imprima a tabela
print(tabela_comparacao)
## 1 2 3 Taxa de Precisão (%)
## Concluded 102 3 0 97.14286
## Cancelled 5 156 10 91.22807
## Distressed 2 36 21 35.59322
## Total_Estimado 109 195 31 83.28358
A matriz de confusão apresentada reflete o desempenho de um modelo de classificação multiclasse. Conclusões por Classe:
Concluded (1): O modelo acertou 102 previsões desta classe, com uma taxa de precisão de 97.14%. Houve 3 casos que foram erroneamente classificados como ‘Cancelled’ e nenhum como ‘Distressed’.
Cancelled (2): Foram corretamente previstas 156 observações desta classe, com uma taxa de precisão de 91.23%. No entanto, o modelo cometeu erros ao classificar 5 casos como ‘Concluded’ e 10 como ‘Distressed’.
Distressed (3): O modelo acertou 21 previsões para esta classe, mas com uma taxa de precisão relativamente baixa de 35.59%. Houve 2 observações erroneamente classificadas como ‘Concluded’ e 36 como ‘Cancelled’.
Total Estimado: O total de estimativas mostra a soma das previsões do modelo para cada classe. Por exemplo, 109 observações foram previstas como ‘Concluded’, 195 como ‘Cancelled’ e 31 como ‘Distressed’.
A curva ROC (Receiver Operating Characteristic) é usada para avaliar a capacidade preditiva de um modelo em relação à classificação binária. No entanto, ela pode ser adaptada para modelos de classificação multinomial usando várias estratégias, como a análise one-vs-rest ou one-vs-one.
A estratégia comum para criar uma curva ROC em um modelo logit multinomial:
O problema foi transformado em vários problemas de classificação binária. Isso foi feito treinando o modelo para cada classe em relação a todas as outras classes combinadas. Utilizando a métrica de interesse (como a taxa de verdadeiros positivos e a taxa de falsos positivos) para avaliar cada modelo de classificação binária resultante. A curva ROC com base nessas métricas para cada classe:
# Transformar o problema em vários problemas de classificação binária
# Supondo que "logm2" é o objeto que representa o modelo logit multinomial
# e "m2" é o conjunto de dados correspondente
# Adicione um pacote para trabalhar com a lógica de classificação
# Crie uma função para ajustar modelos one-vs-rest
fit_one_vs_rest <- function(class_label) {
binary_data <- m2
binary_data$target <- ifelse(statusm2 == class_label, 1, 0)
binary_model <- glm(target ~ ITm2 + km2 + tm2 + percentprivm2 + catpppm2 + sector + debtm2 + catbidm2, data = binary_data, family = binomial)
return(binary_model)
}
# Ajuste um modelo para cada classe
class_labels <- unique(statusm2)
binary_models <- lapply(class_labels, fit_one_vs_rest)
## Warning: glm.fit: probabilidades ajustadas numericamente 0 ou 1 ocorreu
# Calcular as métricas para cada modelo binário
roc_list <- lapply(binary_models, function(model) {
roc_obj <- roc(statusm2, predict(model, type = "response"))
return(roc_obj)
})
## Warning in roc.default(statusm2, predict(model, type = "response")): 'response'
## has more than two levels. Consider setting 'levels' explicitly or using
## 'multiclass.roc' instead
## Setting levels: control = Concluded, case = Cancelled
## Setting direction: controls < cases
## Warning in roc.default(statusm2, predict(model, type = "response")): 'response'
## has more than two levels. Consider setting 'levels' explicitly or using
## 'multiclass.roc' instead
## Setting levels: control = Concluded, case = Cancelled
## Setting direction: controls > cases
## Warning in roc.default(statusm2, predict(model, type = "response")): 'response'
## has more than two levels. Consider setting 'levels' explicitly or using
## 'multiclass.roc' instead
## Setting levels: control = Concluded, case = Cancelled
## Setting direction: controls < cases
# Plotar a curva ROC
plot(roc_list[[1]], col = 1, main = "Curvas ROC para o modelo logit multinomial")
for (i in 2:length(roc_list)) {
lines(roc_list[[i]], col = i)
}
legend("bottomright", legend = class_labels, col = 1:length(roc_list), lty = 1)
A categoria Cancelled possui melhor medida de classificação considerando este exercício de robustez do que com os dados completos. O que também se reflete nas probabilidades estimadas abaixo.
head(pp <- fitted(logm2))
## Concluded Cancelled Distressed
## 1 0.026532102 0.6441932 0.3292747
## 2 0.025751525 0.6442726 0.3299759
## 3 0.004397499 0.6660511 0.3295514
## 4 0.003187578 0.8875625 0.1092499
## 5 0.005507103 0.8889833 0.1055096
## 6 0.001471664 0.8634406 0.1350877
De maneira geral o teste de robustez valida os resultados do modelo principal com os dados de todas as categorias de contrato.