library(MCMCpack)
library(ggplot2)
library(GGally)
h <- swiss
str(h)
## 'data.frame': 47 obs. of 6 variables:
## $ Fertility : num 80.2 83.1 92.5 85.8 76.9 76.1 83.8 92.4 82.4 82.9 ...
## $ Agriculture : num 17 45.1 39.7 36.5 43.5 35.3 70.2 67.8 53.3 45.2 ...
## $ Examination : int 15 6 5 12 17 9 16 14 12 16 ...
## $ Education : int 12 9 5 7 15 7 7 8 7 13 ...
## $ Catholic : num 9.96 84.84 93.4 33.77 5.16 ...
## $ Infant.Mortality: num 22.2 22.2 20.2 20.3 20.6 26.6 23.6 24.9 21 24.4 ...
theme_set(theme_bw())
# plotmatrix(h)
ggpairs(h)
formula <- Fertility ~ Agriculture + Examination + Catholic + Education
Обычный МНК:
m1 <- lm(formula, data = h)
summary(m1)
##
## Call:
## lm(formula = formula, data = h)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.781 -6.331 0.811 5.721 15.557
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91.0554 6.9488 13.10 < 2e-16 ***
## Agriculture -0.2206 0.0736 -3.00 0.0046 **
## Examination -0.2606 0.2741 -0.95 0.3472
## Catholic 0.1244 0.0373 3.34 0.0018 **
## Education -0.9616 0.1945 -4.94 1.3e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.74 on 42 degrees of freedom
## Multiple R-squared: 0.65, Adjusted R-squared: 0.616
## F-statistic: 19.5 on 4 and 42 DF, p-value: 3.95e-09
priors <- c(2, 0.8, 0.5, 0.3, -5)
m1.mcmc <- MCMCregress(formula, data = h, b0 = priors, B0 = 1e-04)
summary(m1.mcmc)
##
## Iterations = 1001:11000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## (Intercept) 90.593 7.0427 0.070427 0.070427
## Agriculture -0.216 0.0744 0.000744 0.000724
## Examination -0.248 0.2814 0.002814 0.002861
## Catholic 0.125 0.0378 0.000378 0.000378
## Education -0.963 0.2003 0.002003 0.002003
## sigma2 62.784 14.4123 0.144123 0.162921
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## (Intercept) 76.8020 85.932 90.597 95.1560 104.5796
## Agriculture -0.3634 -0.265 -0.216 -0.1673 -0.0721
## Examination -0.8027 -0.434 -0.248 -0.0611 0.2991
## Catholic 0.0499 0.100 0.125 0.1505 0.1983
## Education -1.3583 -1.095 -0.964 -0.8290 -0.5647
## sigma2 40.5154 52.492 60.745 70.6544 96.9017
str(m1.mcmc)
## mcmc [1:10000, 1:6] 101.4 91 80.8 100.3 100.3 ...
## - attr(*, "mcpar")= num [1:3] 1001 11000 1
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:6] "(Intercept)" "Agriculture" "Examination" "Catholic" ...
## - attr(*, "title")= chr "MCMCregress Posterior Sample"
## - attr(*, "y")= num [1:47, 1] 80.2 83.1 92.5 85.8 76.9 76.1 83.8 92.4 82.4 82.9 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:47] "Courtelary" "Delemont" "Franches-Mnt" "Moutier" ...
## .. ..$ : NULL
## - attr(*, "call")= language MCMCregress(formula = formula, data = h, b0 = priors, B0 = 1e-04)
beta.sample <- as.data.frame(m1.mcmc)
beta.sample$t <- 1:nrow(beta.sample) # добавим номер элемента последовательности
str(beta.sample)
## 'data.frame': 10000 obs. of 7 variables:
## $ (Intercept): num 101.4 91 80.8 100.3 100.3 ...
## $ Agriculture: num -0.36 -0.237 -0.108 -0.25 -0.353 ...
## $ Examination: num -0.377 -0.212 0.04 -0.628 -0.268 ...
## $ Catholic : num 0.122 0.151 0.122 0.117 0.149 ...
## $ Education : num -1.015 -0.951 -1.064 -0.992 -1.232 ...
## $ sigma2 : num 86.8 60.3 40.4 42 45.5 ...
## $ t : int 1 2 3 4 5 6 7 8 9 10 ...
Зададим удобные имена переменным
colnames(beta.sample)[1] <- "const"
Построим апостериорное распределение свободного члена и визуально оценим сходимость:
ggplot(beta.sample, aes(x = const)) + geom_density()
qplot(t, Agriculture, data = beta.sample)
Оценим модель с другим априорным мнением о коэффициентах:
priors <- c(0.2, 0.1, 0.2, -0.3, -0.1)
m1.mcmc <- MCMCregress(formula, data = h, b0 = priors, B0 = 1e-04)
summary(m1.mcmc)
##
## Iterations = 1001:11000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## (Intercept) 90.584 7.0428 0.070428 0.070428
## Agriculture -0.216 0.0744 0.000744 0.000724
## Examination -0.248 0.2814 0.002814 0.002861
## Catholic 0.125 0.0378 0.000378 0.000378
## Education -0.963 0.2003 0.002003 0.002003
## sigma2 62.784 14.4124 0.144124 0.162922
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## (Intercept) 76.7932 85.922 90.588 95.1461 104.572
## Agriculture -0.3634 -0.265 -0.216 -0.1672 -0.072
## Examination -0.8025 -0.434 -0.247 -0.0609 0.299
## Catholic 0.0499 0.100 0.125 0.1505 0.198
## Education -1.3583 -1.095 -0.964 -0.8289 -0.565
## sigma2 40.5140 52.490 60.744 70.6568 96.899