library(survival)
## Loading required package: splines
UNMissions=read.csv("UNMissions.csv")

Question 1

To fit the model:

mod1=survreg(Surv(Duration, 1-Censored)~InterState, dist='lognormal', data=UNMissions)
summary(mod1)
## 
## Call:
## survreg(formula = Surv(Duration, 1 - Censored) ~ InterState, 
##     data = UNMissions, dist = "lognormal")
##             Value Std. Error     z        p
## (Intercept) 3.408      0.219 15.58 1.07e-54
## InterState  1.580      0.534  2.96 3.08e-03
## Log(scale)  0.329      0.119  2.76 5.76e-03
## 
## Scale= 1.39 
## 
## Log Normal distribution
## Loglik(model)= -197.6   Loglik(intercept only)= -201.8
##  Chisq= 8.37 on 1 degrees of freedom, p= 0.0038 
## Number of Newton-Raphson Iterations: 4 
## n= 54

The p-value for this term is less than .05 (.00308), so there appears to be a stastically significant relationship between InterState and Duration.

To plot the survival curves:

curve(1-plnorm(x, 3.408, 1.39), 0, 600)
curve(1-plnorm(x, 3.408+1.580, 1.39), 0, 600, add=T, col='red')

plot of chunk unnamed-chunk-3

Question 2

To create the new variable:

lexp=log(UNMissions$Expend)

The two models are

survreg(Surv(Duration, 1-Censored)~InterState+lexp+Deaths+Troops, data=UNMissions, dist='lognormal')
## Call:
## survreg(formula = Surv(Duration, 1 - Censored) ~ InterState + 
##     lexp + Deaths + Troops, data = UNMissions, dist = "lognormal")
## 
## Coefficients:
## (Intercept)  InterState        lexp      Deaths      Troops 
##  -4.3159360   0.5586332   0.4363407   0.0136525  -0.0001423 
## 
## Scale= 0.9526 
## 
## Loglik(model)= -151.1   Loglik(intercept only)= -169.8
##  Chisq= 37.28 on 4 degrees of freedom, p= 1.6e-07 
## n=42 (12 observations deleted due to missingness)

and

survreg(Surv(Duration, 1-Censored)~InterState, data=UNMissions, dist='lognormal')
## Call:
## survreg(formula = Surv(Duration, 1 - Censored) ~ InterState, 
##     data = UNMissions, dist = "lognormal")
## 
## Coefficients:
## (Intercept)  InterState 
##       3.408       1.580 
## 
## Scale= 1.39 
## 
## Loglik(model)= -197.6   Loglik(intercept only)= -201.8
##  Chisq= 8.37 on 1 degrees of freedom, p= 0.0038 
## n= 54

We want to see which model is more worthwhile, so our null hypothesis is that the second (original) model is better, and the alternative hypothesis is that the first model is better.

To perform the likelihood ratio test:

2*(-151.1--197.6) #Calcuate how much better the model is with additional covariates
## [1] 93
1-pchisq(93,3) #calculate p-value
## [1] 0

The p-value is 0, so we reject the null hypothesis. There is strong evidence that adding covariates improves the quality of the model.

Question 3

To plot both survival curves, we would need to assign coefficients of lexp, Deaths and Troops equal values for each curve.

Question 4

To find the mean values of each variable:

mean(na.omit(lexp))
## [1] 18.55
mean(na.omit(UNMissions$Deaths))
## [1] 34.17
mean(na.omit(UNMissions$Troops))
## [1] 4584

To graph the survival curves, using the means as our set values:

curve(1-plnorm(x, -4.32 + .436*18.54855 + .014*34.16667 + -.000142*4583.822, 1.39), 0, 600)
curve(1-plnorm(x, -4.32+.559 + .436*18.54855 + .014*34.16667 + -.000142*4583.822, 1.39), 0, 600, add=T, col='red')

plot of chunk unnamed-chunk-9

Question 5

There is less space between the curves in the graph from (4), which makes sense when considering that the p-value found in (2) indicated that a model with more covariates would be more effective.

Question 6

To fit the models:

modw=survreg(Surv(Duration, 1-Censored)~CivilWar, dist='weibull', data=UNMissions)
modl=survreg(Surv(Duration, 1-Censored)~CivilWar, dist='lognormal', data=UNMissions)
summary(modw)
## 
## Call:
## survreg(formula = Surv(Duration, 1 - Censored) ~ CivilWar, data = UNMissions, 
##     dist = "weibull")
##             Value Std. Error     z        p
## (Intercept)  4.82      0.268 18.00 1.98e-72
## CivilWar    -1.63      0.483 -3.37 7.55e-04
## Log(scale)   0.33      0.124  2.66 7.74e-03
## 
## Scale= 1.39 
## 
## Weibull distribution
## Loglik(model)= -205.8   Loglik(intercept only)= -210
##  Chisq= 8.35 on 1 degrees of freedom, p= 0.0039 
## Number of Newton-Raphson Iterations: 5 
## n= 54

The p-value of the Log(scale) parameter (.33) indicates that there is not enough evidence to reject the Log-Normal model in favor of the Weibull model.

summary(modl)
## 
## Call:
## survreg(formula = Surv(Duration, 1 - Censored) ~ CivilWar, data = UNMissions, 
##     dist = "lognormal")
##              Value Std. Error     z        p
## (Intercept)  3.943      0.248 15.92 4.58e-57
## CivilWar    -0.928      0.471 -1.97 4.89e-02
## Log(scale)   0.382      0.119  3.21 1.34e-03
## 
## Scale= 1.47 
## 
## Log Normal distribution
## Loglik(model)= -199.9   Loglik(intercept only)= -201.8
##  Chisq= 3.75 on 1 degrees of freedom, p= 0.053 
## Number of Newton-Raphson Iterations: 3 
## n= 54

The coefficient on \(CivilWar\) in the log-normal model (-.928) indicates that missions in response to a civil war are almost one unit shorter than those that are not.

CoxSnell = function(cs,status,xlim=NULL,ylim=NULL)  {
kmcs = survfit(Surv(jitter(cs,amount=(max(cs)-min(cs))/1000),status)~1)$surv
plot(log(-log(kmcs))~sort(log(cs)),xlab="log(Cox-Snell)",ylab="log(-log(S(Cox-Snell)))",xlim=xlim,ylim=ylim)
abline(0,1,col='red')
}

To check the adequacy of the Weibull model:

survreg(Surv(Duration, 1-Censored)~CivilWar, dist='weibull', data=UNMissions)
## Call:
## survreg(formula = Surv(Duration, 1 - Censored) ~ CivilWar, data = UNMissions, 
##     dist = "weibull")
## 
## Coefficients:
## (Intercept)    CivilWar 
##       4.823      -1.627 
## 
## Scale= 1.39 
## 
## Loglik(model)= -205.8   Loglik(intercept only)= -210
##  Chisq= 8.35 on 1 degrees of freedom, p= 0.0039 
## n= 54
CS = -log( 1 - pweibull(UNMissions$Duration , shape=1/1.390 , scale=exp(4.823301-1.627310*UNMissions$CivilWar ) ) )
CoxSnell(CS, UNMissions$Censored)

plot of chunk unnamed-chunk-14

The Weibull model does not appear at all appropriate.

To check the adequacy of the log-normal model:

survreg(Surv(Duration, 1-Censored)~CivilWar, dist='lognormal', data=UNMissions)
## Call:
## survreg(formula = Surv(Duration, 1 - Censored) ~ CivilWar, data = UNMissions, 
##     dist = "lognormal")
## 
## Coefficients:
## (Intercept)    CivilWar 
##      3.9427     -0.9284 
## 
## Scale= 1.466 
## 
## Loglik(model)= -199.9   Loglik(intercept only)= -201.8
##  Chisq= 3.75 on 1 degrees of freedom, p= 0.053 
## n= 54
CS = -log( 1 - plnorm(UNMissions$Duration , 3.9426907-0.9284464*UNMissions$CivilWar , 1.304805 ) )
CoxSnell(CS, UNMissions$Censored)

plot of chunk unnamed-chunk-15

The Log-Normal model also does not appear to be appropriate, but does look to be slightly more accurate than the Weibull model.

d. To calculate the AIC value for the Weibull model:

2*(3-205.8) #2*(number of parameters in model - log-likelihood of model)
## [1] -405.6

To calculate the AIC value for the Log-Normal model:

2*(3-199.9)
## [1] -393.8

The AIC values lead us to the same conclusion that we found in (c): while neither of the models are particularly appropriate, the Log-Normal model seems to perform slightly better than the Weibull model.

  1. Likelihood ratio tests are only appropriate for models that are nested, so we cannot use one to compare the Weibull and log-normal models.

Question 7

  1. The Weibull and Exponential AFT models have parameters that must be positive, and using a log scale ensures this. Normal models do not have this limitation.

  2. #Explain Cox-Snell process(lecture 9)