library(logspline)
## Warning: package 'logspline' was built under R version 3.5.2
set.seed(1234)
x <- rexp(1000)
hist(x)
The summary function tells you how many knots is best, in the sense that the AIC is minimized.
m <- logspline(x)
summary(m) #summary.logspline() works as well
## knots A(1)/D(2) loglik AIC minimum penalty maximum penalty
## 3 2 -1004.48 2022.77 5.29 Inf
## 4 2 -1002.65 2026.03 NA NA
## 5 2 -999.19 2026.01 1.41 5.29
## 6 2 -998.74 2032.02 NA NA
## 7 2 -997.90 2037.25 NA NA
## 8 2 -997.42 2043.19 NA NA
## 9 2 -996.40 2048.07 NA NA
## 10 2 -996.39 2054.96 NA NA
## 11 2 -995.26 2059.61 NA NA
## 12 2 -994.26 2064.51 1.08 1.41
## 13 2 -994.18 2071.25 NA NA
## 14 2 -993.34 2076.49 NA NA
## 15 2 -992.64 2081.98 0.29 1.08
## 16 2 -992.49 2088.60 0.01 0.29
## 17 1 -992.49 2095.50 0.00 0.01
## the present optimal number of knots is 3
## penalty(AIC) was the default: BIC=log(samplesize): log( 1000 )= 6.91
Plotting the density estimator.
par(mfrow=c(2,2))
plot.logspline(m, n=1000, what='d', main="pdf") #pdf
plot.logspline(m, n=1000, what='p', main="cdf") #cdf
plot.logspline(m, n=1000, what='s', main="survival") #survival function
plot.logspline(m, n=1000, what='h', main="hazard") #hazard function
Analysis of the density estimator
dlogspline(c(1, 3), m) #probability of a given x (density at x)
## [1] 0.36869981 0.04973915
plogspline(c(1, 3), m) #cumulative probability (area under curve)
## [1] 0.6318874 0.9503401
qlogspline(c(0.05, 0.95), m) #quantile or CI estimation
## [1] 0.05343683 2.99318635
This example uses a transformation to find the peaks.
ls <- logspline(data)
data2 <- log(data)
ls2 <- logspline(data2)
par(mfrow=c(1,2))
plot(ls, main='density of x')
plot(ls2, main= 'density of log(x)')
To find the values of the peaks, you can use a density estimator.
dlogspline(seq(from=3, to=4, .1), ls2) #probability of a given x (density at x)
## [1] 0.01629774 0.01877530 0.02073342 0.02189277 0.02225280 0.02193923
## [7] 0.02114024 0.02006086 0.01889029 0.01778592 0.01687182
exp(3.4) #peak at 30 seconds
## [1] 29.9641
set.seed(1234)
x <- rexp(1000) + rnorm(1000, 0, 1)
hist(x)
You can model this as skewed normal, or go non-parametric.
ls3 <- logspline(x)
summary(ls3)
## knots A(1)/D(2) loglik AIC minimum penalty maximum penalty
## 3 2 -1718.36 3450.54 3.65 Inf
## 4 2 -1716.54 3453.80 2.24 3.65
## 5 2 -1716.51 3460.66 NA NA
## 6 2 -1714.76 3464.07 NA NA
## 7 2 -1714.23 3469.91 NA NA
## 8 2 -1712.05 3472.46 1.94 2.24
## 9 2 -1711.08 3477.43 0.56 1.94
## 10 2 -1710.80 3483.77 0.53 0.56
## 11 2 -1710.67 3490.41 NA NA
## 12 2 -1710.38 3496.74 NA NA
## 13 2 -1710.01 3502.92 0.43 0.53
## 14 2 -1709.80 3509.39 0.18 0.43
## 15 2 -1709.70 3516.12 0.07 0.18
## 16 1 -1709.67 3522.96 0.00 0.07
## the present optimal number of knots is 3
## penalty(AIC) was the default: BIC=log(samplesize): log( 1000 )= 6.91
par(mfrow=c(1, 2))
hist(x)
plot.logspline(ls3)