data("groundbeef", package = "fitdistrplus")
my_data <- groundbeef$serving
plot(my_data, pch=20)
hist(my_data)
Plots empirical density function. (Note the empirical density function is built from the CDF.)
plotdist(my_data, histo = TRUE, demp = TRUE)
You can basically use any distribution with the standard d/q/p/r functionality in R.
For continuous, use normal, log normal, exponential, gamma, uniform, beta, logis. For discrete, binomial, geometric, hypergeometric, poisson, or nbinom (multi?)
?fitdist
## starting httpd help server ... done
fit_w <- fitdist(my_data, "weibull")
fit_g <- fitdist(my_data, "gamma")
fit_ln <- fitdist(my_data, "lnorm")
summary(fit_ln)
## Fitting of the distribution ' lnorm ' by maximum likelihood
## Parameters :
## estimate Std. Error
## meanlog 4.1693701 0.03366988
## sdlog 0.5366095 0.02380783
## Loglikelihood: -1261.319 AIC: 2526.639 BIC: 2533.713
## Correlation matrix:
## meanlog sdlog
## meanlog 1 0
## sdlog 0 1
See which distribution fits the best using chi-square. Note that is uses the parameters esimated above. Snazzy!
gofstat(list(fit_w, fit_g, fit_ln), fitnames = c("Weilbull", "Gamma", "Lnorm"))
## Goodness-of-fit statistics
## Weilbull Gamma Lnorm
## Kolmogorov-Smirnov statistic 0.1396646 0.1281246 0.1493090
## Cramer-von Mises statistic 0.6840994 0.6934112 0.8277358
## Anderson-Darling statistic 3.5736460 3.5660192 4.5436542
##
## Goodness-of-fit criteria
## Weilbull Gamma Lnorm
## Akaike's Information Criterion 2514.449 2511.250 2526.639
## Bayesian Information Criterion 2521.524 2518.325 2533.713
Note the Kolmogorov-Smirnov is included here. For more information, try out ks.test.
par(mfrow=c(2,2))
plot.legend <- c("Weibull", "lognormal", "gamma")
denscomp(list(fit_w, fit_g, fit_ln), legendtext = plot.legend)
cdfcomp (list(fit_w, fit_g, fit_ln), legendtext = plot.legend)
qqcomp (list(fit_w, fit_g, fit_ln), legendtext = plot.legend)
ppcomp (list(fit_w, fit_g, fit_ln), legendtext = plot.legend)
descdist(my_data, discrete=FALSE, boot=500)
## summary statistics
## ------
## min: 10 max: 200
## median: 79
## mean: 73.64567
## estimated sd: 35.88487
## estimated skewness: 0.7352745
## estimated kurtosis: 3.551384
The fitting can work with other non-base distribution. It only needs that the correspodent, d, p, q functions are implemented.
data("endosulfan", package = "fitdistrplus")
my_data <- endosulfan$ATV
fit_ln <- fitdist(my_data, "lnorm")
cdfcomp(fit_ln, xlogscale = TRUE, ylogscale = TRUE)
Add other distributions in the library “actuar”
library(actuar)
##
## Attaching package: 'actuar'
## The following object is masked from 'package:grDevices':
##
## cm
fit_ll <- fitdist(my_data, "llogis", start = list(shape = 1, scale = 500))
fit_P <- fitdist(my_data, "pareto", start = list(shape = 1, scale = 500))
fit_B <- fitdist(my_data, "burr", start = list(shape1 = 0.3, shape2 = 1, rate = 1))
cdfcomp(list(fit_ln, fit_ll, fit_P, fit_B), xlogscale = TRUE, ylogscale = TRUE,
legendtext = c("lognormal", "loglogistic", "Pareto", "Burr"), lwd=2)
Check for goodness of fit. Remember a low chi-square statistic indicates that the distribution is a good fit.
gofstat(list(fit_ln, fit_ll, fit_P, fit_B), fitnames = c("lnorm", "llogis", "Pareto", "Burr"))
## Goodness-of-fit statistics
## lnorm llogis Pareto Burr
## Kolmogorov-Smirnov statistic 0.1672498 0.1195888 0.08488002 0.06154925
## Cramer-von Mises statistic 0.6373593 0.3827449 0.13926498 0.06803071
## Anderson-Darling statistic 3.4721179 2.8315975 0.89206283 0.52393018
##
## Goodness-of-fit criteria
## lnorm llogis Pareto Burr
## Akaike's Information Criterion 1068.810 1069.246 1048.112 1045.731
## Bayesian Information Criterion 1074.099 1074.535 1053.400 1053.664
You can bootstrap to estimate the uncertainty in the parameters
ests <- bootdist(fit_B, niter = 1e3)
summary(ests)
## Parametric bootstrap medians and 95% percentile CI
## Median 2.5% 97.5%
## shape1 0.2035533 0.09672861 0.3739276
## shape2 1.5786159 1.05231668 2.9645034
## rate 1.4508249 0.66844970 2.6631288
plot(ests)
quantile(ests, probs=.05)
## (original) estimated quantiles for each specified probability (non-censored data)
## p=0.05
## estimate 0.2939259
## Median of bootstrap estimates
## p=0.05
## estimate 0.3004415
##
## two-sided 95 % CI of each quantile
## p=0.05
## 2.5 % 0.1717223
## 97.5 % 0.5109727
Here’s a good one for normal distributions.
xn <- rnorm(n=100, mean=10, sd=5)
mean(xn); sd(xn)
## [1] 9.528916
## [1] 4.768952
fit_n <- fitdist(xn, "norm")
summary(fit_n)
## Fitting of the distribution ' norm ' by maximum likelihood
## Parameters :
## estimate Std. Error
## mean 9.528916 0.4745047
## sd 4.745047 0.3355255
## Loglikelihood: -297.604 AIC: 599.208 BIC: 604.4183
## Correlation matrix:
## mean sd
## mean 1.000000e+00 -1.131244e-09
## sd -1.131244e-09 1.000000e+00
plotdist(xn, "norm", para=list(mean=mean(xn), sd=sd(xn)), demp = TRUE)
ests <- bootdist(fit_n, niter = 1e3)
summary(ests)
## Parametric bootstrap medians and 95% percentile CI
## Median 2.5% 97.5%
## mean 9.508337 8.574283 10.526574
## sd 4.680614 4.065518 5.345391