library(readr)
datos <- read_csv("examen.csv")
str(datos)
Classes 'tbl_df', 'tbl' and 'data.frame':   100 obs. of  6 variables:
 $ id         : int  1 31 56 85 98 53 4 30 61 78 ...
 $ tiemposeg  : int  6 6 14 44 62 89 98 104 107 114 ...
 $ estadovital: int  1 1 1 1 1 1 1 1 1 1 ...
 $ edad       : int  65 72 64 71 86 87 81 85 90 80 ...
 $ sexo       : int  1 1 0 1 0 0 0 0 1 1 ...
 $ imccat     : int  2 1 2 2 2 2 2 1 2 2 ...
 - attr(*, "spec")=List of 2
  ..$ cols   :List of 6
  .. ..$ id         : list()
  .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
  .. ..$ tiemposeg  : list()
  .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
  .. ..$ estadovital: list()
  .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
  .. ..$ edad       : list()
  .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
  .. ..$ sexo       : list()
  .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
  .. ..$ imccat     : list()
  .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
  ..$ default: list()
  .. ..- attr(*, "class")= chr  "collector_guess" "collector"
  ..- attr(*, "class")= chr "col_spec"
datos$imccat <- factor(datos$imccat, levels = 0:2, labels = c("Normal", "Sobrepeso", "Obesidad"))
table(unclass(datos$imccat), datos$imccat)
   
    Normal Sobrepeso Obesidad
  1     26         0        0
  2      0        35        0
  3      0         0       39
datos$imccat = relevel(datos$imccat, ref=1)

datos$sexo <- factor(datos$sexo, levels = 0:1, labels = c("Masculino", "Femenino"))
table(unclass(datos$sexo), datos$sexo)
   
    Masculino Femenino
  1        35        0
  2         0       65
table(datos$sexo, datos$estadovital)
           
             0  1
  Masculino 11 24
  Femenino  36 29
datos$sexo = relevel(datos$sexo, ref=2)

summary(datos)
       id           tiemposeg     estadovital        edad      
 Min.   :  1.00   Min.   :   6   Min.   :0.00   Min.   :32.00  
 1st Qu.: 25.75   1st Qu.: 715   1st Qu.:0.00   1st Qu.:59.75  
 Median : 50.50   Median :1878   Median :1.00   Median :71.00  
 Mean   : 50.50   Mean   :1505   Mean   :0.53   Mean   :68.25  
 3rd Qu.: 75.25   3rd Qu.:2076   3rd Qu.:1.00   3rd Qu.:80.25  
 Max.   :100.00   Max.   :2719   Max.   :1.00   Max.   :92.00  
        sexo          imccat  
 Femenino :65   Normal   :26  
 Masculino:35   Sobrepeso:35  
                Obesidad :39  
                              
                              
                              
library(survival)
survobj <- with(datos, Surv(tiemposeg,estadovital))
survobj
  [1]    6     6    14    44    62    89    98   104   107   114   123 
 [12]  128   148   182   187   189   274   274   302   363   374   451 
 [23]  461   492   538   774   841   936  1002  1011  1048  1054  1172 
 [34] 1205  1278  1401  1497  1557  1577  1624  1669  1806  1836  1836+
 [45] 1846+ 1859+ 1860+ 1870+ 1874  1876+ 1879+ 1883+ 1889+ 1907  1912+
 [56] 1916+ 1922+ 1923+ 1929+ 1934+ 1939+ 1939+ 1969+ 1984+ 1993+ 2003+
 [67] 2012  2013+ 2031  2052+ 2054+ 2061+ 2065  2072+ 2074+ 2084+ 2114+
 [78] 2124+ 2137+ 2137+ 2145+ 2157  2173+ 2174+ 2183+ 2190+ 2201  2421 
 [89] 2573+ 2574+ 2578+ 2595+ 2610+ 2613  2624+ 2631+ 2638+ 2641+ 2710 
[100] 2719+
# Plot survival distribution of the total sample
# Kaplan-Meier estimator
fit0 <- survfit(survobj~1, data=datos)
summary(fit0)
Call: survfit(formula = survobj ~ 1, data = datos)

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    6    100       2    0.980  0.0140       0.9529        1.000
   14     98       1    0.970  0.0171       0.9371        1.000
   44     97       1    0.960  0.0196       0.9224        0.999
   62     96       1    0.950  0.0218       0.9082        0.994
   89     95       1    0.940  0.0237       0.8946        0.988
   98     94       1    0.930  0.0255       0.8813        0.981
  104     93       1    0.920  0.0271       0.8683        0.975
  107     92       1    0.910  0.0286       0.8556        0.968
  114     91       1    0.900  0.0300       0.8431        0.961
  123     90       1    0.890  0.0313       0.8307        0.953
  128     89       1    0.880  0.0325       0.8186        0.946
  148     88       1    0.870  0.0336       0.8065        0.938
  182     87       1    0.860  0.0347       0.7946        0.931
  187     86       1    0.850  0.0357       0.7828        0.923
  189     85       1    0.840  0.0367       0.7711        0.915
  274     84       2    0.820  0.0384       0.7481        0.899
  302     82       1    0.810  0.0392       0.7366        0.891
  363     81       1    0.800  0.0400       0.7253        0.882
  374     80       1    0.790  0.0407       0.7141        0.874
  451     79       1    0.780  0.0414       0.7029        0.866
  461     78       1    0.770  0.0421       0.6918        0.857
  492     77       1    0.760  0.0427       0.6807        0.848
  538     76       1    0.750  0.0433       0.6698        0.840
  774     75       1    0.740  0.0439       0.6588        0.831
  841     74       1    0.730  0.0444       0.6480        0.822
  936     73       1    0.720  0.0449       0.6372        0.814
 1002     72       1    0.710  0.0454       0.6264        0.805
 1011     71       1    0.700  0.0458       0.6157        0.796
 1048     70       1    0.690  0.0462       0.6051        0.787
 1054     69       1    0.680  0.0466       0.5945        0.778
 1172     68       1    0.670  0.0470       0.5839        0.769
 1205     67       1    0.660  0.0474       0.5734        0.760
 1278     66       1    0.650  0.0477       0.5629        0.751
 1401     65       1    0.640  0.0480       0.5525        0.741
 1497     64       1    0.630  0.0483       0.5421        0.732
 1557     63       1    0.620  0.0485       0.5318        0.723
 1577     62       1    0.610  0.0488       0.5215        0.713
 1624     61       1    0.600  0.0490       0.5113        0.704
 1669     60       1    0.590  0.0492       0.5011        0.695
 1806     59       1    0.580  0.0494       0.4909        0.685
 1836     58       1    0.570  0.0495       0.4808        0.676
 1874     52       1    0.559  0.0498       0.4696        0.666
 1907     47       1    0.547  0.0501       0.4573        0.655
 2012     34       1    0.531  0.0511       0.4397        0.641
 2031     32       1    0.514  0.0522       0.4217        0.628
 2065     28       1    0.496  0.0534       0.4017        0.613
 2157     19       1    0.470  0.0566       0.3711        0.595
 2201     14       1    0.436  0.0618       0.3307        0.576
 2421     13       1    0.403  0.0655       0.2929        0.554
 2613      7       1    0.345  0.0774       0.2225        0.536
 2710      2       1    0.173  0.1281       0.0403        0.739
plot(fit0, xlab="Survival Time in Days",
   ylab="% Surviving", yscale=100,
   main="Survival Distribution (Overall)")

# Compare the survival distributions of IMC groups
fit1 <- survfit(survobj~imccat,data=datos, conf.type='log')
summary(fit1)
Call: survfit(formula = survobj ~ imccat, data = datos, conf.type = "log")

                imccat=Normal 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
 1002     26       1    0.962  0.0377        0.890        1.000
 1048     25       1    0.923  0.0523        0.826        1.000
 1054     24       1    0.885  0.0627        0.770        1.000
 1205     23       1    0.846  0.0708        0.718        0.997
 1624     22       1    0.808  0.0773        0.670        0.974
 1669     21       1    0.769  0.0826        0.623        0.949
 2031     10       1    0.692  0.1042        0.515        0.930
 2157      6       1    0.577  0.1365        0.363        0.917
 2613      3       1    0.385  0.1815        0.153        0.970

                imccat=Sobrepeso 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    6     35       1    0.971  0.0282        0.918        1.000
  104     34       1    0.943  0.0392        0.869        1.000
  189     33       1    0.914  0.0473        0.826        1.000
  274     32       1    0.886  0.0538        0.786        0.998
  302     31       1    0.857  0.0591        0.749        0.981
  363     30       1    0.829  0.0637        0.713        0.963
  774     29       1    0.800  0.0676        0.678        0.944
  936     28       1    0.771  0.0710        0.644        0.924
 1011     27       1    0.743  0.0739        0.611        0.903
 1172     26       1    0.714  0.0764        0.579        0.881
 1836     25       1    0.686  0.0785        0.548        0.858
 1907     19       1    0.650  0.0822        0.507        0.833
 2065     15       1    0.606  0.0874        0.457        0.804
 2201      8       1    0.531  0.1043        0.361        0.780
 2421      7       1    0.455  0.1136        0.279        0.742
 2710      1       1    0.000     NaN           NA           NA

                imccat=Obesidad 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
    6     39       1    0.974  0.0253        0.926        1.000
   14     38       1    0.949  0.0353        0.882        1.000
   44     37       1    0.923  0.0427        0.843        1.000
   62     36       1    0.897  0.0486        0.807        0.998
   89     35       1    0.872  0.0535        0.773        0.983
   98     34       1    0.846  0.0578        0.740        0.967
  107     33       1    0.821  0.0615        0.708        0.950
  114     32       1    0.795  0.0647        0.678        0.932
  123     31       1    0.769  0.0675        0.648        0.914
  128     30       1    0.744  0.0699        0.618        0.894
  148     29       1    0.718  0.0721        0.590        0.874
  182     28       1    0.692  0.0739        0.562        0.853
  187     27       1    0.667  0.0755        0.534        0.832
  274     26       1    0.641  0.0768        0.507        0.811
  374     25       1    0.615  0.0779        0.480        0.789
  451     24       1    0.590  0.0788        0.454        0.766
  461     23       1    0.564  0.0794        0.428        0.743
  492     22       1    0.538  0.0798        0.403        0.720
  538     21       1    0.513  0.0800        0.378        0.696
  841     20       1    0.487  0.0800        0.353        0.672
 1278     19       1    0.462  0.0798        0.329        0.648
 1401     18       1    0.436  0.0794        0.305        0.623
 1497     17       1    0.410  0.0788        0.282        0.598
 1557     16       1    0.385  0.0779        0.259        0.572
 1577     15       1    0.359  0.0768        0.236        0.546
 1806     14       1    0.333  0.0755        0.214        0.520
 1874     13       1    0.308  0.0739        0.192        0.493
 2012      8       1    0.269  0.0740        0.157        0.461
fit1
Call: survfit(formula = survobj ~ imccat, data = datos, conf.type = "log")

                  n events median 0.95LCL 0.95UCL
imccat=Normal    26      9   2613    2157      NA
imccat=Sobrepeso 35     16   2421    2065      NA
imccat=Obesidad  39     28    841     374    1874
# plot the survival distributions by sex
table(datos$imccat)

   Normal Sobrepeso  Obesidad 
       26        35        39 
plot(fit1, xlab="Survival Time in Days",
  ylab="% Surviving", yscale=100, col=c("green","blue", "red"),
  main="Survival Distributions by IMC")
abline(h = .5, col='gray')
  legend("bottomleft", title="IMC", c("Normal", "Sobrepeso", "Obesidad"),
  fill=c("green","blue", "red"))

# test for difference between IMC groups
# survival curves (logrank test)
survdiff(survobj~imccat, data=datos)
Call:
survdiff(formula = survobj ~ imccat, data = datos)

                  N Observed Expected (O-E)^2/E (O-E)^2/V
imccat=Normal    26        9     16.6      3.51      5.20
imccat=Sobrepeso 35       16     21.0      1.18      1.98
imccat=Obesidad  39       28     15.4     10.38     14.95

 Chisq= 15.4  on 2 degrees of freedom, p= 0.000446 
mod1 <- coxph(survobj~imccat,data=datos)

# display results
summary(mod1)
Call:
coxph(formula = survobj ~ imccat, data = datos)

  n= 100, number of events= 53 

                  coef exp(coef) se(coef)     z Pr(>|z|)   
imccatSobrepeso 0.3490    1.4177   0.4177 0.835  0.40345   
imccatObesidad  1.2435    3.4678   0.3862 3.220  0.00128 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                exp(coef) exp(-coef) lower .95 upper .95
imccatSobrepeso     1.418     0.7054    0.6252     3.215
imccatObesidad      3.468     0.2884    1.6266     7.393

Concordance= 0.671  (se = 0.04 )
Rsquare= 0.132   (max possible= 0.987 )
Likelihood ratio test= 14.19  on 2 df,   p=0.0008302
Wald test            = 14.09  on 2 df,   p=0.0008718
Score (logrank) test = 15.43  on 2 df,   p=0.0004464
# Modelo completo
mod2 <- coxph(survobj~imccat + sexo + edad,data=datos)

# display results
summary(mod2)
Call:
coxph(formula = survobj ~ imccat + sexo + edad, data = datos)

  n= 100, number of events= 53 

                    coef exp(coef) se(coef)      z Pr(>|z|)   
imccatSobrepeso -0.05209   0.94924  0.43717 -0.119  0.90515   
imccatObesidad   0.95026   2.58639  0.40373  2.354  0.01859 * 
sexoMasculino    0.16785   1.18276  0.30091  0.558  0.57697   
edad             0.03463   1.03523  0.01182  2.929  0.00341 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                exp(coef) exp(-coef) lower .95 upper .95
imccatSobrepeso    0.9492     1.0535    0.4030     2.236
imccatObesidad     2.5864     0.3866    1.1723     5.706
sexoMasculino      1.1828     0.8455    0.6558     2.133
edad               1.0352     0.9660    1.0115     1.060

Concordance= 0.709  (se = 0.043 )
Rsquare= 0.24   (max possible= 0.987 )
Likelihood ratio test= 27.47  on 4 df,   p=1.597e-05
Wald test            = 27.14  on 4 df,   p=1.861e-05
Score (logrank) test = 29.85  on 4 df,   p=5.248e-06
drop1(mod2, test='Chisq')
Single term deletions

Model:
survobj ~ imccat + sexo + edad
       Df    AIC     LRT Pr(>Chi)   
<none>    413.04                    
imccat  2 420.70 11.6601 0.002938 **
sexo    1 411.35  0.3101 0.577593   
edad    1 420.85  9.8036 0.001742 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Reduccion
mod3 <- coxph(survobj~imccat + edad,data=datos)

# display results
summary(mod3)
Call:
coxph(formula = survobj ~ imccat + edad, data = datos)

  n= 100, number of events= 53 

                    coef exp(coef) se(coef)      z Pr(>|z|)    
imccatSobrepeso -0.03650   0.96416  0.43601 -0.084  0.93328    
imccatObesidad   0.96181   2.61642  0.40249  2.390  0.01686 *  
edad             0.03695   1.03764  0.01109  3.333  0.00086 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                exp(coef) exp(-coef) lower .95 upper .95
imccatSobrepeso    0.9642     1.0372    0.4102     2.266
imccatObesidad     2.6164     0.3822    1.1888     5.758
edad               1.0376     0.9637    1.0153     1.060

Concordance= 0.708  (se = 0.043 )
Rsquare= 0.238   (max possible= 0.987 )
Likelihood ratio test= 27.16  on 3 df,   p=5.45e-06
Wald test            = 26.6  on 3 df,   p=7.129e-06
Score (logrank) test = 29.14  on 3 df,   p=2.091e-06
drop1(mod3, test='Chisq')
Single term deletions

Model:
survobj ~ imccat + edad
       Df    AIC    LRT  Pr(>Chi)    
<none>    411.35                     
imccat  2 419.06 11.703 0.0028759 ** 
edad    1 422.33 12.972 0.0003162 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Reduccion
mod4 <- coxph(survobj~imccat + imccat*sexo + edad,data=datos)

# display results
summary(mod4)
Call:
coxph(formula = survobj ~ imccat + imccat * sexo + edad, data = datos)

  n= 100, number of events= 53 

                                  coef exp(coef) se(coef)      z Pr(>|z|)
imccatSobrepeso                0.08229   1.08577  0.54915  0.150   0.8809
imccatObesidad                 1.05374   2.86837  0.49009  2.150   0.0315
sexoMasculino                  0.47529   1.60848  0.71419  0.665   0.5057
edad                           0.03508   1.03570  0.01190  2.948   0.0032
imccatSobrepeso:sexoMasculino -0.39875   0.67116  0.87317 -0.457   0.6479
imccatObesidad:sexoMasculino  -0.34560   0.70780  0.81642 -0.423   0.6721
                                
imccatSobrepeso                 
imccatObesidad                * 
sexoMasculino                   
edad                          **
imccatSobrepeso:sexoMasculino   
imccatObesidad:sexoMasculino    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                              exp(coef) exp(-coef) lower .95 upper .95
imccatSobrepeso                  1.0858     0.9210    0.3701     3.185
imccatObesidad                   2.8684     0.3486    1.0977     7.495
sexoMasculino                    1.6085     0.6217    0.3967     6.521
edad                             1.0357     0.9655    1.0118     1.060
imccatSobrepeso:sexoMasculino    0.6712     1.4900    0.1212     3.716
imccatObesidad:sexoMasculino     0.7078     1.4128    0.1429     3.506

Concordance= 0.708  (se = 0.043 )
Rsquare= 0.242   (max possible= 0.987 )
Likelihood ratio test= 27.69  on 6 df,   p=0.0001074
Wald test            = 27.01  on 6 df,   p=0.0001442
Score (logrank) test = 30.7  on 6 df,   p=2.892e-05
drop1(mod4, test='Chisq')
Single term deletions

Model:
survobj ~ imccat + imccat * sexo + edad
            Df    AIC    LRT Pr(>Chi)   
<none>         416.82                   
edad         1 424.69 9.8708 0.001679 **
imccat:sexo  2 413.04 0.2213 0.895255   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Reduccion
mod5 <- coxph(survobj~imccat + imccat*tiemposeg + edad*tiemposeg,data=datos)

# display results
summary(mod5)
Call:
coxph(formula = survobj ~ imccat + imccat * tiemposeg + edad * 
    tiemposeg, data = datos)

  n= 100, number of events= 53 

                                coef  exp(coef)   se(coef)      z Pr(>|z|)
imccatSobrepeso           -3.836e+00  2.158e-02  1.455e+01 -0.264  0.79212
imccatObesidad            -3.677e+00  2.529e-02  1.459e+01 -0.252  0.80099
tiemposeg                 -4.272e-01  6.523e-01  1.416e-01 -3.017  0.00256
edad                      -3.206e-02  9.685e-01  7.989e-02 -0.401  0.68823
imccatSobrepeso:tiemposeg  2.976e-03  1.003e+00  7.062e-03  0.421  0.67347
imccatObesidad:tiemposeg   3.404e-03  1.003e+00  7.170e-03  0.475  0.63492
tiemposeg:edad             7.030e-06  1.000e+00  4.432e-05  0.159  0.87397
                            
imccatSobrepeso             
imccatObesidad              
tiemposeg                 **
edad                        
imccatSobrepeso:tiemposeg   
imccatObesidad:tiemposeg    
tiemposeg:edad              
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                          exp(coef) exp(-coef) lower .95 upper .95
imccatSobrepeso             0.02158    46.3327 8.821e-15 5.281e+10
imccatObesidad              0.02529    39.5417 9.661e-15 6.620e+10
tiemposeg                   0.65231     1.5330 4.942e-01 8.610e-01
edad                        0.96845     1.0326 8.281e-01 1.133e+00
imccatSobrepeso:tiemposeg   1.00298     0.9970 9.892e-01 1.017e+00
imccatObesidad:tiemposeg    1.00341     0.9966 9.894e-01 1.018e+00
tiemposeg:edad              1.00001     1.0000 9.999e-01 1.000e+00

Concordance= 1  (se = 0.043 )
Rsquare= 0.986   (max possible= 0.987 )
Likelihood ratio test= 425.2  on 7 df,   p=0
Wald test            = 9.37  on 7 df,   p=0.2274
Score (logrank) test = 202.7  on 7 df,   p=0
drop1(mod5, test='Chisq')
Single term deletions

Model:
survobj ~ imccat + imccat * tiemposeg + edad * tiemposeg
                 Df    AIC      LRT Pr(>Chi)
<none>              21.344                  
imccat:tiemposeg  2 18.074  0.73033   0.6941
tiemposeg:edad    1 19.253 -0.09102   1.0000
# graphs for systematic trends
(a <- cox.zph(mod3))
                   rho chisq      p
imccatSobrepeso -0.242 3.500 0.0614
imccatObesidad  -0.335 5.890 0.0152
edad             0.121 0.961 0.3268
GLOBAL              NA 6.050 0.1092
par(mfrow = c(3, 1))
for(i in 1:3) plot(a[i])