1.How to know the hrf shape of your data?

The best tool for determining the hrf shape of any data is the TTT plot.

Data 1: Constant hrf Shape

library(AdequacyModel)
y<-c(10,10,10,10,10,10,10,10,10,10)
TTT(y, col = "blue", lwd = 2.5, grid = TRUE, lty = 2)

library(AmoudSurv)

## Warning: package 'AmoudSurv' was built under R version 4.1.3

data("alloauto")
time<-alloauto$time
TTT(time, col = "green", lwd = 2.5, grid = TRUE, lty = 2)

Data 2: Increasing hrf Shape

x<-c(1:100)
TTT(x, col = "red", lwd = 2.5, grid = TRUE, lty = 2)

data("carbone")
TTT(carbone, col = "green", lwd = 2.5, grid = TRUE, lty = 2)

Data 3: Decreasing hrf Shape

z<-sort(x,decreasing = TRUE)
TTT(z, col = "green", lwd = 2.5, grid = TRUE, lty = 2)

Data 4: Non-Monotonic

library(AmoudSurv)
data("gastric")
time1<-gastric$time
TTT(time, col = "blue", lwd = 2.5, grid = TRUE, lty = 2)

How to combine the two plots

par(mfrow=c(1,3))
TTT(time, col = "green", lwd = 2.5, grid = TRUE, lty = 2)
TTT(carbone, col = "yellow", lwd = 2.5, grid = TRUE, lty = 2)
TTT(time, col = "blue", lwd = 2.5, grid = TRUE, lty = 2)

2. Exponential Distribution

library(AdequacyModel)

# CDF
cdfe<-function(par,x){
  lambda<-par[1]
  1-exp(-lambda*x)
}

# PDF
pdfe<-function(par,x){
  lambda<-par[1]
  lambda*exp(-lambda*x)
}

# Data
data("carbone")

# Modelling
goodness.fit(pdf = pdfe, cdf = cdfe, starts = c(1), data = carbone, 
             method = "BFGS", domain = c(0,Inf), mle = NULL)

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in ks.test(x = data, y = "cdf", par = as.vector(parameters)): ties
## should not be present for the Kolmogorov-Smirnov test

## $W
## [1] 0.1493349
## 
## $A
## [1] 0.7643187
## 
## $KS
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  data
## D = 0.32059, p-value = 2.364e-09
## alternative hypothesis: two-sided
## 
## 
## $mle
## [1] 0.3814764
## 
## $AIC
## [1] 394.7417
## 
## $`CAIC `
## [1] 394.7825
## 
## $BIC
## [1] 397.3469
## 
## $HQIC
## [1] 395.7961
## 
## $Erro
## [1] 0.03814738
## 
## $Value
## [1] 196.3709
## 
## $Convergence
## [1] 0

3. Wei-bull Distribution

library(AdequacyModel)

# CDF
cdfw<-function(par,x){
  alpha<-par[1]
  beta<-par[2]
  1-exp(-alpha*x)^beta
}

# PDF
pdfw<-function(par,x){
  alpha<-par[1]
  beta<-par[2]
  alpha*beta*(alpha*x)^(beta-1)*exp(-(alpha*x)^beta)
}

# Data
data("carbone")

# Modelling
goodness.fit(pdf = pdfw, cdf = cdfw, starts = c(1,1), data = carbone, 
             method = "BFGS", domain = c(0,Inf), mle = NULL)

## Warning in ks.test(x = data, y = "cdf", par = as.vector(parameters)): ties
## should not be present for the Kolmogorov-Smirnov test

## $W
## [1] 0.1333968
## 
## $A
## [1] 0.6811538
## 
## $KS
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  data
## D = 0.64453, p-value < 2.2e-16
## alternative hypothesis: two-sided
## 
## 
## $mle
## [1] 0.3397091 2.7928603
## 
## $AIC
## [1] 287.0586
## 
## $`CAIC `
## [1] 287.1823
## 
## $BIC
## [1] 292.2689
## 
## $HQIC
## [1] 289.1673
## 
## $Erro
## [1] 0.01282196 0.21409886
## 
## $Value
## [1] 141.5293
## 
## $Convergence
## [1] 0

4. Gompertz Distribution

library(AdequacyModel)

# CDF
cdfg<-function(par,x){
  eta<-par[1]
  b<-par[2]
  1-exp(-eta*(exp(b*x)-1))
}

# PDF
pdfg<-function(par,x){
  eta<-par[1]
  b<-par[2]
  b*eta*exp(eta+b*x-eta*exp(b*x))
}

# Data
data("carbone")

# Modelling
goodness.fit(pdf = pdfg, cdf = cdfg, starts = c(1,1), data = carbone, 
             method = "BFGS", domain = c(0,Inf), mle = NULL)

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in log(pdf(par, x)): NaNs produced

## Warning in ks.test(x = data, y = "cdf", par = as.vector(parameters)): ties
## should not be present for the Kolmogorov-Smirnov test

## $W
## [1] 0.1597538
## 
## $A
## [1] 1.260904
## 
## $KS
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  data
## D = 0.096206, p-value = 0.3129
## alternative hypothesis: two-sided
## 
## 
## $mle
## [1] 0.09726057 0.79101610
## 
## $AIC
## [1] 302.25
## 
## $`CAIC `
## [1] 302.3738
## 
## $BIC
## [1] 307.4604
## 
## $HQIC
## [1] 304.3588
## 
## $Erro
## [1] 0.03088534 0.07758541
## 
## $Value
## [1] 149.125
## 
## $Convergence
## [1] 0

Information (Selection) Criteria

You need to check the likelihood values including (AIC, BIC, CAIC, HQIC). In general the model with the smaller value of AIC, BIC, CAIC & HQIC is the best.

        Exponential     Weibull     Gompertz

AIC: 394.7417; 287.0586; 302.2500
CAIC: 394.7825; 287.1823; 302.3738
BIC: 397.3469; 292.2689; 307.4604
HQIC: 395.7961; 289.1673; 304.3588

We select the the weibull distribution since has smaller value.

Module 8: Applied Probability in R

Mohamoud Jama Ali

2023-01-12