Modelos de sobrevivência, A Análise de Sobrevivência compreende os estudos em que o interesse principal é avaliar o tempo até a ocorrência de um evento pré-determinado. Esses tempos, chamados de tempos de falha, podem, então, ser explicados por outras variáveis a partir de modelos de regressão paramétricos ou semi-paramétricos. Uma característica fundamental desse tipo de estudo é a presença de censura, definida como a observação parcial do tempo de falha.

dados<- read.table("C:/Users/Gustavo/Downloads/sobrevivencia2.csv",header = T, sep = ",")
head(dados)
##        Bike_id Publish_period     End_Date Departure_Date
## 1  XL3-1095127             10 1.301357e+18   1.305590e+18
## 3  XL3-1123648             28 1.302307e+18   1.307923e+18
## 4  XL3-1139930             17 1.302826e+18   1.308096e+18
## 6  XL3-1277412             17 1.301616e+18   1.310170e+18
## 8  XL3-1513147              4 1.302221e+18   1.312848e+18
## 10 XL3-1146406             48 1.307318e+18   1.317427e+18
##    Distance_travelled  Color Country_Sold N_Photos  Price N_Inquires
## 1              123525 Silver      Vietnam       27 110000         34
## 3              120000 Silver      Vietnam       28 110000         53
## 4              149374 Silver      Vietnam       27 130000         34
## 6              112167  Black      Vietnam       26 150000         58
## 8              126000  Black      Vietnam       29 150000         20
## 10             154117 Silver         Laos       28 150000         73
##    is_sold
## 1        0
## 3        1
## 4        0
## 6        0
## 8        0
## 10       0
dados$Color<- as.factor(dados$Color)
dados$Country_Sold<- as.factor(dados$Country_Sold)
attach(dados)

Análise descritiva dos dados

Algumas estatísticas

dad<-dados[,c(-1,-6,-7,-11)]
basicStats(dad)
##             Publish_period      End_Date Departure_Date Distance_travelled
## nobs           2660.000000  2.660000e+03   2.660000e+03       2.660000e+03
## NAs               0.000000  0.000000e+00   0.000000e+00       0.000000e+00
## Minimum           0.000000  1.301357e+18   1.305590e+18       6.560000e+03
## Maximum          66.000000  1.402358e+18   1.409702e+18       1.813260e+05
## 1. Quartile       5.000000  1.364947e+18   1.369267e+18       7.151825e+04
## 3. Quartile      17.000000  1.390262e+18   1.395792e+18       1.080158e+05
## Mean             11.968421  1.375150e+18   1.379954e+18       9.012179e+04
## Median            9.000000  1.381277e+18   1.384819e+18       9.051150e+04
## Sum           31836.000000  3.657898e+21   3.670677e+21       2.397240e+08
## SE Mean           0.194594  3.872566e+14   3.876686e+14       5.281289e+02
## LCL Mean         11.586850  1.374390e+18   1.379193e+18       8.908621e+04
## UCL Mean         12.349992  1.375909e+18   1.380714e+18       9.115738e+04
## Variance        100.725591  3.989140e+32   3.997634e+32       7.419276e+08
## Stdev            10.036214  1.997283e+16   1.999408e+16       2.723835e+04
## Skewness          1.482170 -1.173550e+00  -1.024769e+00       2.545200e-02
## Kurtosis          2.995359  1.034617e+00   6.429600e-01      -1.159300e-01
##                 N_Photos        Price   N_Inquires
## nobs         2660.000000 2.660000e+03  2660.000000
## NAs             0.000000 0.000000e+00     0.000000
## Minimum         4.000000 6.000000e+04     1.000000
## Maximum        45.000000 2.100000e+05   306.000000
## 1. Quartile    28.000000 9.000000e+04     4.000000
## 3. Quartile    30.000000 1.000000e+05    15.000000
## Mean           29.622180 9.292707e+04    12.218045
## Median         30.000000 9.000000e+04     8.000000
## Sum         78795.000000 2.471860e+08 32500.000000
## SE Mean         0.040813 2.585055e+02     0.311374
## LCL Mean       29.542152 9.242018e+04    11.607485
## UCL Mean       29.702209 9.343396e+04    12.828605
## Variance        4.430723 1.777547e+08   257.897531
## Stdev           2.104928 1.333247e+04    16.059188
## Skewness       -3.158401 1.638957e+00     6.851322
## Kurtosis       50.685277 6.119893e+00    87.903827
dad<- dados[,c(6,7,11)]
dad$is_sold<- as.factor(dad$is_sold)
summary(dad)
##      Color         Country_Sold  is_sold 
##  Silver :649   Laos      :1472   0:2321  
##  Blue   :608   Philippine: 404   1: 339  
##  Red    :592   Vietnam   : 275           
##  Pearl  :209   Cambodia  : 245           
##  Black  :196   Malaysia  :  57           
##  Pink   :107   Chile     :  41           
##  (Other):299   (Other)   : 166
par(mfrow=c(2,2))
boxplot(dados$Price, las= 2, main = "BoxPlot da variável Preço")
boxplot(dados$N_Inquires, las= 2, main = "BoxPlot da variável Número de conversas")
boxplot(dados$N_Photos, las= 2, main = "BoxPlot da variável N de Fotos")
boxplot(dados$Distance_travelled, las= 2, main = "BoxPlot da variável Distância percorrida")

Teste de normalidade

ad.test(Price)
## 
##  Anderson-Darling normality test
## 
## data:  Price
## A = 126.19, p-value < 2.2e-16
ad.test(Publish_period)
## 
##  Anderson-Darling normality test
## 
## data:  Publish_period
## A = 72.349, p-value < 2.2e-16
ad.test(N_Inquires)
## 
##  Anderson-Darling normality test
## 
## data:  N_Inquires
## A = 261.77, p-value < 2.2e-16
ad.test(N_Photos)
## 
##  Anderson-Darling normality test
## 
## data:  N_Photos
## A = 115.53, p-value < 2.2e-16
ad.test(Distance_travelled)
## 
##  Anderson-Darling normality test
## 
## data:  Distance_travelled
## A = 0.48731, p-value = 0.224

Gráficos

par(mfrow=c(2,2))
histPlot(as.timeSeries(Price))
histPlot(as.timeSeries(Publish_period))
histPlot(as.timeSeries(N_Inquires))
histPlot(as.timeSeries(N_Photos))

histPlot(as.timeSeries(Distance_travelled))

Correlação

dad<-dados[,c(-1,-3,-4,-6,-7,-11)]
corrplot(cor(dad), order = "hclust",tl.col = 'black', tl.cex = 0.75)

R <- round(cor(dad), 2);R
##                    Publish_period Distance_travelled N_Photos Price
## Publish_period               1.00               0.06     0.08  0.16
## Distance_travelled           0.06               1.00    -0.10 -0.12
## N_Photos                     0.08              -0.10     1.00  0.03
## Price                        0.16              -0.12     0.03  1.00
## N_Inquires                   0.26               0.10    -0.12  0.10
##                    N_Inquires
## Publish_period           0.26
## Distance_travelled       0.10
## N_Photos                -0.12
## Price                    0.10
## N_Inquires               1.00

Métodos não parametricos

Kaplan-Meier e LogRank

Curva de sobrevivência para os dados Completos

ekm1<- survfit(Surv(tempos,cens)~1, data= dados);ekm1
## Call: survfit(formula = Surv(tempos, cens) ~ 1, data = dados)
## 
##       n  events  median 0.95LCL 0.95UCL 
##    2660     339      NA      NA      NA
summary(ekm1)
## Call: survfit(formula = Surv(tempos, cens) ~ 1, data = dados)
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   0.1   2660      10    0.996 0.00119        0.994        0.999
##   1.0   2548      15    0.990 0.00192        0.987        0.994
##   2.0   2430      20    0.982 0.00263        0.977        0.987
##   3.0   2284      36    0.967 0.00364        0.960        0.974
##   4.0   2145      20    0.958 0.00413        0.950        0.966
##   5.0   2004      22    0.947 0.00465        0.938        0.956
##   6.0   1877      40    0.927 0.00554        0.916        0.938
##   7.0   1728      56    0.897 0.00666        0.884        0.910
##   8.0   1561      26    0.882 0.00716        0.868        0.896
##   9.0   1445      16    0.872 0.00749        0.858        0.887
##  10.0   1329      13    0.864 0.00778        0.849        0.879
##  11.0   1209       8    0.858 0.00799        0.843        0.874
##  12.0   1110      11    0.850 0.00831        0.833        0.866
##  13.0   1019       6    0.845 0.00851        0.828        0.861
##  14.0    932      13    0.833 0.00899        0.815        0.851
##  15.0    832       2    0.831 0.00908        0.813        0.849
##  16.0    752       4    0.826 0.00930        0.808        0.845
##  17.0    691       2    0.824 0.00942        0.806        0.843
##  18.0    633       5    0.817 0.00979        0.798        0.837
##  20.0    505       1    0.816 0.00990        0.797        0.835
##  21.0    461       5    0.807 0.01056        0.787        0.828
##  22.0    401       2    0.803 0.01088        0.782        0.825
##  24.0    324       1    0.800 0.01113        0.779        0.823
##  25.0    284       2    0.795 0.01174        0.772        0.818
##  28.0    210       1    0.791 0.01228        0.767        0.815
##  29.0    186       1    0.787 0.01293        0.762        0.813
##  38.0     65       1    0.775 0.01750        0.741        0.810
plot(ekm1, ylab = "S(t)", xlab = "Dias", main = "Curva de Sobrevivencia")

Curvas de sobrevivência para as variáveis independentes

Cor

KMcor <- survfit(Surv(tempos, cens)~ Color, data = dados)
KMcor
## Call: survfit(formula = Surv(tempos, cens) ~ Color, data = dados)
## 
##                n events median 0.95LCL 0.95UCL
## Color=Beige    2      0     NA      NA      NA
## Color=Black  196     20     NA      NA      NA
## Color=Blue   608     78     NA      NA      NA
## Color=Brown    1      0     NA      NA      NA
## Color=Gray    73     11     NA      NA      NA
## Color=Green   49      5     NA      NA      NA
## Color=Maroon  26      9     16      10      NA
## Color=Pearl  209     29     38      38      NA
## Color=Pink   107      4     NA      NA      NA
## Color=Purple   1      0     NA      NA      NA
## Color=Red    592     73     NA      NA      NA
## Color=Silver 649     93     NA      NA      NA
## Color=White  106     15     NA      NA      NA
## Color=Yellow  41      2     NA      NA      NA
plot(KMcor, lty = 1:14, col = 1:14, ylab = "S(t)", xlab = "Dias",
     conf.int = F, main ="Curva de Sobrevivência pelas cores das motos")

Preço

#separando o preco em dois(mediana) grupos temos q
summary(Price)#mediana= 90000
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   60000   90000   90000   92927  100000  210000
idapreco<- cut(dados$Price, breaks=c(21000,80000,171000),labels=c("1","2"), right=F)
KMpreco2<- survfit(Surv(tempos, cens) ~idapreco, data= dados)
KMpreco2
## Call: survfit(formula = Surv(tempos, cens) ~ idapreco, data = dados)
## 
##    1 observation deleted due to missingness 
##               n events median 0.95LCL 0.95UCL
## idapreco=1   98     34     21      14      25
## idapreco=2 2561    305     NA      NA      NA
plot(KMpreco2, lty = 1:2, col = 2:3, ylab = "S(t)", xlab = "Dias",
     conf.int = F, main = "Curvas de sobrevivências com base no preço")
legend(1,0.3,lty = 1:2, col = 2:3,c("1 < mediana","2 >= mediana"),lwd=1, bty="n")

survdiff(Surv(tempos,cens)~idapreco, data = dados, rho= 0)
## Call:
## survdiff(formula = Surv(tempos, cens) ~ idapreco, data = dados, 
##     rho = 0)
## 
## n=2659, 1 observation deleted due to missingness.
## 
##               N Observed Expected (O-E)^2/E (O-E)^2/V
## idapreco=1   98       34     10.3      54.2      56.7
## idapreco=2 2561      305    328.7       1.7      56.7
## 
##  Chisq= 56.7  on 1 degrees of freedom, p= 5e-14

N_fotos

summary((N_Photos))#30 mediana
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4.00   28.00   30.00   29.62   30.00   45.00
idafotos<- cut(dados$N_Photos, breaks=c(4,30,46),labels=c("1","2"), right=F)
length(idafotos)
## [1] 2660
KMfoto <- survfit(Surv(tempos, cens) ~ idafotos, data = dados)
KMfoto
## Call: survfit(formula = Surv(tempos, cens) ~ idafotos, data = dados)
## 
##               n events median 0.95LCL 0.95UCL
## idafotos=1 1284    209     NA      NA      NA
## idafotos=2 1376    130     NA      NA      NA
plot(KMfoto, lty = 1:2, col = 2:3, ylab = "S(t)", xlab = "Dias",
     conf.int = F, main ="Curva de Sobrevivência pelo número de fotos")
legend(1,0.3,lty = 1:2, col = 2:3,c("1 < mediana","2 >= mediana"),lwd=1, bty="n")

survdiff(Surv(tempos,cens)~idafotos, data = dados, rho= 0)
## Call:
## survdiff(formula = Surv(tempos, cens) ~ idafotos, data = dados, 
##     rho = 0)
## 
##               N Observed Expected (O-E)^2/E (O-E)^2/V
## idafotos=1 1284      209      157      17.2      32.5
## idafotos=2 1376      130      182      14.8      32.5
## 
##  Chisq= 32.5  on 1 degrees of freedom, p= 1e-08

N_Dialogos

#---------------------------------------------N_inquires
summary(N_Inquires)#8 mediana
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    4.00    8.00   12.22   15.00  306.00
idainqui<- cut(dados$N_Inquires, breaks=c(1,8,307),labels=c("1","2"), right=F)
KMinqui <- survfit(Surv(tempos, cens) ~ idainqui, data = dados)
KMinqui
## Call: survfit(formula = Surv(tempos, cens) ~ idainqui, data = dados)
## 
##               n events median 0.95LCL 0.95UCL
## idainqui=1 1279    261     NA      NA      NA
## idainqui=2 1381     78     NA      NA      NA
plot(KMinqui, lty = 1:2, col = 2:3, ylab = "S(t)", xlab = "Dias",
     conf.int = F, main ="Curva de Sobrevida pelo número de Inqueritos")
legend(1,0.3,lty = 1:2, col = 2:3,c("1 < mediana","2 >= mediana"),lwd=1, bty="n")

survdiff(Surv(tempos,cens)~idainqui, data = dados, rho= 0)
## Call:
## survdiff(formula = Surv(tempos, cens) ~ idainqui, data = dados, 
##     rho = 0)
## 
##               N Observed Expected (O-E)^2/E (O-E)^2/V
## idainqui=1 1279      261      145      92.6       166
## idainqui=2 1381       78      194      69.3       166
## 
##  Chisq= 166  on 1 degrees of freedom, p= <2e-16

Distância percorrida

summary(Distance_travelled)#90512 mediana
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6560   71518   90512   90122  108016  181326
idadist<- cut(dados$Distance_travelled, breaks=c(6560,90512,181326),labels=c("1","2"), right=F)
KMdist <- survfit(Surv(tempos, cens) ~ idainqui, data = dados)
KMdist
## Call: survfit(formula = Surv(tempos, cens) ~ idainqui, data = dados)
## 
##               n events median 0.95LCL 0.95UCL
## idainqui=1 1279    261     NA      NA      NA
## idainqui=2 1381     78     NA      NA      NA
plot(KMdist, lty = 1:2, col = 2:3, ylab = "S(t)", xlab = "Dias",
     conf.int = F, main ="Curva de Sobrevida pela distáncia percorrida")
legend(1,0.3,lty = 1:2, col = 2:3,c("1 < mediana","2 >= mediana"),lwd=1, bty="n")

survdiff(Surv(tempos,cens)~idadist, data = dados, rho= 0)
## Call:
## survdiff(formula = Surv(tempos, cens) ~ idadist, data = dados, 
##     rho = 0)
## 
## n=2659, 1 observation deleted due to missingness.
## 
##              N Observed Expected (O-E)^2/E (O-E)^2/V
## idadist=1 1330      206      166      9.57      19.1
## idadist=2 1329      133      173      9.20      19.1
## 
##  Chisq= 19.1  on 1 degrees of freedom, p= 1e-05

Métodos Parametricos

Escolha da Distribuição

ajust1<-survreg(Surv(tempos,cens)~1,dist="exponential")
alpha1<-exp(ajust1$coefficients[1])
ajust2<-survreg(Surv(tempos,cens)~1,dist="weibull")
alpha2<-exp(ajust2$coefficients[1]);
gama<-1/ajust2$scale
ajust3<-survreg(Surv(tempos,cens)~1,dist="lognorm")
#S(T) KAPLA VS ESTIMATIVAS
ekm<- survfit(Surv(tempos,cens)~1, data= dados)
time<-ekm$time
st<-ekm$surv
ste<- exp(-time/alpha1)
stw<- exp(-(time/alpha2)^gama)
stln<- pnorm((-log(time)+ 4.778)/2.0347)
cbind(time,st,ste,stw,stln)
##       time        st       ste       stw      stln
##  [1,]  0.1 0.9962406 0.9989361 0.9984058 0.9997492
##  [2,]  1.0 0.9903758 0.9894119 0.9868017 0.9905693
##  [3,]  2.0 0.9822245 0.9789359 0.9751655 0.9776568
##  [4,]  3.0 0.9667429 0.9685708 0.9641332 0.9647216
##  [5,]  4.0 0.9577290 0.9583154 0.9535150 0.9522360
##  [6,]  5.0 0.9472150 0.9481687 0.9432214 0.9402959
##  [7,]  6.0 0.9270292 0.9381293 0.9331998 0.9289012
##  [8,]  7.0 0.8969866 0.9281963 0.9234151 0.9180230
##  [9,]  8.0 0.8820464 0.9183684 0.9138422 0.9076246
## [10,]  9.0 0.8722798 0.9086446 0.9044619 0.8976691
## [11,] 10.0 0.8637474 0.8990238 0.8952592 0.8881217
## [12,] 11.0 0.8580319 0.8895048 0.8862221 0.8789506
## [13,] 12.0 0.8495289 0.8800866 0.8773404 0.8701274
## [14,] 13.0 0.8445268 0.8707682 0.8686055 0.8616262
## [15,] 14.0 0.8327469 0.8615484 0.8600101 0.8534241
## [16,] 15.0 0.8307451 0.8524262 0.8515477 0.8455004
## [17,] 16.0 0.8263262 0.8434006 0.8432126 0.8378366
## [18,] 17.0 0.8239345 0.8344706 0.8349999 0.8304159
## [19,] 18.0 0.8174264 0.8256351 0.8269050 0.8232231
## [20,] 19.0 0.8174264 0.8168931 0.8189237 0.8162446
## [21,] 20.0 0.8158077 0.8082438 0.8110523 0.8094677
## [22,] 21.0 0.8069595 0.7996860 0.8032873 0.8028811
## [23,] 22.0 0.8029347 0.7912188 0.7956256 0.7964744
## [24,] 23.0 0.8029347 0.7828413 0.7880642 0.7902379
## [25,] 24.0 0.8004565 0.7745525 0.7806004 0.7841630
## [26,] 25.0 0.7948195 0.7663514 0.7732316 0.7782414
## [27,] 26.0 0.7948195 0.7582372 0.7659555 0.7724656
## [28,] 27.0 0.7948195 0.7502089 0.7587697 0.7668288
## [29,] 28.0 0.7910347 0.7422656 0.7516722 0.7613244
## [30,] 29.0 0.7867818 0.7344064 0.7446609 0.7559465
## [31,] 30.0 0.7867818 0.7266304 0.7377339 0.7506895
## [32,] 31.0 0.7867818 0.7189367 0.7308894 0.7455482
## [33,] 32.0 0.7867818 0.7113245 0.7241257 0.7405177
## [34,] 33.0 0.7867818 0.7037930 0.7174412 0.7355934
## [35,] 34.0 0.7867818 0.6963411 0.7108342 0.7307712
## [36,] 35.0 0.7867818 0.6889682 0.7043033 0.7260468
## [37,] 36.0 0.7867818 0.6816733 0.6978470 0.7214167
## [38,] 37.0 0.7867818 0.6744556 0.6914639 0.7168771
## [39,] 38.0 0.7746775 0.6673144 0.6851528 0.7124248
## [40,] 39.0 0.7746775 0.6602488 0.6789122 0.7080566
## [41,] 40.0 0.7746775 0.6532580 0.6727410 0.7037696
## [42,] 41.0 0.7746775 0.6463412 0.6666380 0.6995607
## [43,] 42.0 0.7746775 0.6394977 0.6606020 0.6954275
## [44,] 43.0 0.7746775 0.6327266 0.6546320 0.6913673
## [45,] 44.0 0.7746775 0.6260272 0.6487267 0.6873777
## [46,] 45.0 0.7746775 0.6193988 0.6428853 0.6834564
## [47,] 46.0 0.7746775 0.6128405 0.6371066 0.6796013
## [48,] 47.0 0.7746775 0.6063517 0.6313897 0.6758102
## [49,] 48.0 0.7746775 0.5999315 0.6257336 0.6720812
## [50,] 49.0 0.7746775 0.5935794 0.6201375 0.6684123
## [51,] 50.0 0.7746775 0.5872945 0.6146003 0.6648019
## [52,] 51.0 0.7746775 0.5810761 0.6091212 0.6612481
## [53,] 52.0 0.7746775 0.5749236 0.6036994 0.6577493
## [54,] 54.0 0.7746775 0.5628134 0.5930243 0.6509106
## [55,] 56.0 0.7746775 0.5509582 0.5825686 0.6442738
## [56,] 57.0 0.7746775 0.5451246 0.5774210 0.6410278
## [57,] 59.0 0.7746775 0.5336420 0.5672828 0.6346740
## [58,] 60.0 0.7746775 0.5279917 0.5622908 0.6315639
## [59,] 61.0 0.7746775 0.5224013 0.5573492 0.6284967
## [60,] 63.0 0.7746775 0.5113973 0.5476148 0.6224870
## [61,] 66.0 0.7746775 0.4953245 0.5333751 0.6137689

Escolhendo a melhor distribuição para o modelo de sobrevivencia pelo teste TRV

x1<-survreg(Surv(tempos,cens)~1, data= dados, dist="lognormal") #lognormal tem 2 parametros
x2<-survreg(Surv(tempos,cens)~1, data= dados, dist="exponential") #exponencial tem 1 parametros
x3<-survreg(Surv(tempos,cens)~1, data= dados, dist="weibull") #tem 2 parametros
alpha.e<-exp(x2$coefficients[1])
alpha.w<-exp(x3$coefficients[1])
gama<-1/x3$scale
x4<-flexsurvreg(Surv(tempos,cens)~1, data= dados, dist="gengamma")# gama generalizada tem 3 parametros
TRV1=2*(x4$loglik-x1$loglik[2])
modelo1=1-pchisq(TRV1,1)
TRV2=2*(x4$loglik-x2$loglik[2])
modelo2=1-pchisq(TRV2,2)
TRV3=2*(x4$loglik-x3$loglik[2])
modelo3=1-pchisq(TRV3,1)
distri<- c("exp","weibull","lognormal");distri
## [1] "exp"       "weibull"   "lognormal"
modelos=c(modelo1,modelo2,modelo3);modelos
## [1] 8.939157e-01 8.088587e-07 7.059712e-07
Gráfico de Kaplan-Meier vs distribuições
par(mfrow=c(1,1))
plot(ekm, conf.int=F, xlab="Tempos", ylab="S(t)", main= "Kaplan-Meier vs exponencial" )
lines(c(0,time),c(1,ste), lty=2)
legend(3,0.4,lty=c(1,2),c("Kaplan-Meier", "exponencial"),bty="n",cex=0.8)

plot(ekm, conf.int=F, xlab="Tempos", ylab="S(t)", main= "Kaplan-Meier vs Weibull" )
lines(c(0,time),c(1,stw), lty=2)
legend(3,0.4,lty=c(1,2),c("Kaplan-Meier", "Weibull"),bty="n",cex=0.8)

plot(ekm, conf.int=F, xlab="Tempos", ylab="S(t)", main= "Kaplan-Meiervs log-normal")
lines(c(0,time),c(1,stln), lty=2)
legend(3,0.4,lty=c(1,2),c("Kaplan-Meier", "Log-normal"),bty="n",cex=0.8)

Collet para escolha do melhor modelo

Passo 1

1. Ajustar todos os modelos contendo uma unica covariável. Incluir
todas as covariáveis que forem significativas ao nível de 0; 10.
É aconselhável utilizar o teste da razão deverossimilhanças neste passo.
ajuste1<-survreg(Surv(tempos,cens)~1, data= dados, dist="lognorm")
ajuste2<-survreg(Surv(tempos,cens)~Distance_travelled, data= dados, dist="lognorm")
summary(ajust2)
## 
## Call:
## survreg(formula = Surv(tempos, cens) ~ 1, dist = "weibull")
##              Value Std. Error     z      p
## (Intercept) 4.6941     0.1055 44.49 <2e-16
## Log(scale)  0.0828     0.0454  1.82  0.068
## 
## Scale= 1.09 
## 
## Weibull distribution
## Loglik(model)= -1877.2   Loglik(intercept only)= -1877.2
## Number of Newton-Raphson Iterations: 10 
## n= 2660
ajuste3<-survreg(Surv(tempos,cens)~N_Photos, data= dados, dist="lognorm")
summary(ajuste3)
## 
## Call:
## survreg(formula = Surv(tempos, cens) ~ N_Photos, data = dados, 
##     dist = "lognorm")
##               Value Std. Error     z       p
## (Intercept) -0.7709     0.7631 -1.01    0.31
## N_Photos     0.1862     0.0264  7.05 1.8e-12
## Log(scale)   0.6783     0.0423 16.03 < 2e-16
## 
## Scale= 1.97 
## 
## Log Normal distribution
## Loglik(model)= -1839.5   Loglik(intercept only)= -1865
##  Chisq= 50.99 on 1 degrees of freedom, p= 9.3e-13 
## Number of Newton-Raphson Iterations: 5 
## n= 2660
ajuste4<-survreg(Surv(tempos,cens)~N_Inquires, data= dados, dist="lognorm")
summary(ajuste4)
## 
## Call:
## survreg(formula = Surv(tempos, cens) ~ N_Inquires, data = dados, 
##     dist = "lognorm")
##               Value Std. Error    z      p
## (Intercept) 3.49654    0.10763 32.5 <2e-16
## N_Inquires  0.10473    0.00891 11.8 <2e-16
## Log(scale)  0.55693    0.04185 13.3 <2e-16
## 
## Scale= 1.75 
## 
## Log Normal distribution
## Loglik(model)= -1758.2   Loglik(intercept only)= -1865
##  Chisq= 213.47 on 1 degrees of freedom, p= 2.4e-48 
## Number of Newton-Raphson Iterations: 5 
## n= 2660
ajuste5<-survreg(Surv(tempos,cens)~Price, data= dados, dist="lognorm")
summary(ajuste5)
## 
## Call:
## survreg(formula = Surv(tempos, cens) ~ Price, data = dados, dist = "lognorm")
##                Value Std. Error     z       p
## (Intercept) 1.85e+00   4.59e-01  4.03 5.5e-05
## Price       3.13e-05   5.08e-06  6.16 7.1e-10
## Log(scale)  6.88e-01   4.24e-02 16.25 < 2e-16
## 
## Scale= 1.99 
## 
## Log Normal distribution
## Loglik(model)= -1844   Loglik(intercept only)= -1865
##  Chisq= 41.91 on 1 degrees of freedom, p= 9.6e-11 
## Number of Newton-Raphson Iterations: 5 
## n= 2660
ajuste6<-survreg(Surv(tempos,cens)~Color, data= dados, dist="lognorm")
summary(ajuste6)
## 
## Call:
## survreg(formula = Surv(tempos, cens) ~ Color, data = dados, dist = "lognorm")
##                 Value Std. Error     z      p
## (Intercept)   12.8322   813.3194  0.02   0.99
## ColorBlack    -7.9480   813.3194 -0.01   0.99
## ColorBlue     -8.0969   813.3194 -0.01   0.99
## ColorBrown    -4.7233  1429.9126  0.00   1.00
## ColorGray     -7.9780   813.3195 -0.01   0.99
## ColorGreen    -7.6169   813.3195 -0.01   0.99
## ColorMaroon   -9.6240   813.3196 -0.01   0.99
## ColorPearl    -8.1915   813.3194 -0.01   0.99
## ColorPink     -6.5785   813.3195 -0.01   0.99
## ColorPurple   -1.0345  1429.9126  0.00   1.00
## ColorRed      -8.0493   813.3194 -0.01   0.99
## ColorSilver   -8.2466   813.3194 -0.01   0.99
## ColorWhite    -8.3344   813.3194 -0.01   0.99
## ColorYellow   -7.0512   813.3197 -0.01   0.99
## Log(scale)     0.6959     0.0424 16.41 <2e-16
## 
## Scale= 2.01 
## 
## Log Normal distribution
## Loglik(model)= -1850.6   Loglik(intercept only)= -1865
##  Chisq= 28.79 on 13 degrees of freedom, p= 0.007 
## Number of Newton-Raphson Iterations: 12 
## n= 2660
ajuste7<-survreg(Surv(tempos,cens)~Country_Sold, data= dados, dist="lognorm")
summary(ajuste7)
## 
## Call:
## survreg(formula = Surv(tempos, cens) ~ Country_Sold, data = dados, 
##     dist = "lognorm")
##                                          Value Std. Error     z       p
## (Intercept)                           3.10e+00   6.82e-01  4.54 5.5e-06
## Country_SoldAruba                     1.06e+01   1.09e+03  0.01  0.9922
## Country_SoldBahamas                   1.30e+00   1.31e+00  1.00  0.3190
## Country_SoldBotswana                  1.03e+01   1.57e+03  0.01  0.9948
## Country_SoldBrunei                    1.02e+00   1.07e+00  0.95  0.3408
## Country_SoldBurundi                  -5.04e-01   1.63e+00 -0.31  0.7576
## Country_SoldCambodia                  9.65e-01   7.02e-01  1.38  0.1690
## Country_SoldCayman Islands           -2.00e+00   2.02e+00 -0.99  0.3213
## Country_SoldChile                     3.38e-01   7.83e-01  0.43  0.6665
## Country_SoldCommonwealth Of Dominica  9.14e+00   0.00e+00   Inf < 2e-16
## Country_SoldD.R.Congo                 1.05e+01   1.58e+03  0.01  0.9947
## Country_SoldGuatemala                 5.00e-01   1.11e+00  0.45  0.6515
## Country_SoldIndonesia                 3.15e-01   8.10e-01  0.39  0.6978
## Country_SoldKazakhstan                5.23e+00   0.00e+00   Inf < 2e-16
## Country_SoldLaos                      2.02e+00   6.87e-01  2.93  0.0033
## Country_SoldLiberia                   9.93e+00   0.00e+00   Inf < 2e-16
## Country_SoldMalawi                    9.22e-01   8.55e-01  1.08  0.2807
## Country_SoldMalaysia                  1.17e+00   7.73e-01  1.51  0.1313
## Country_SoldMicro