#Пример расчет множественной регрессии
#Пример - зависимость уровня убийств по штатам (на примере США) в зависимости от социально-экономических и прочих показателей (численность населения, грамотность, уровень доходов и количество холодных дней).
states<-data.frame(state.x77)
states <- as.data.frame(state.x77[,c("Murder", "Population", "Illiteracy", "Income", "Frost")])
cor(states)
## Murder Population Illiteracy Income Frost
## Murder 1.0000000 0.3436428 0.7029752 -0.2300776 -0.5388834
## Population 0.3436428 1.0000000 0.1076224 0.2082276 -0.3321525
## Illiteracy 0.7029752 0.1076224 1.0000000 -0.4370752 -0.6719470
## Income -0.2300776 0.2082276 -0.4370752 1.0000000 0.2262822
## Frost -0.5388834 -0.3321525 -0.6719470 0.2262822 1.0000000
library(car)
## Loading required package: carData
scatterplotMatrix(states, lty = 2, main ='Соотношение переменных')

#Рассчитаем модель
fit <- lm(Murder ~ Population + Illiteracy + Income + Frost, data = states)
summary(fit)
##
## Call:
## lm(formula = Murder ~ Population + Illiteracy + Income + Frost,
## data = states)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7960 -1.6495 -0.0811 1.4815 7.6210
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.235e+00 3.866e+00 0.319 0.7510
## Population 2.237e-04 9.052e-05 2.471 0.0173 *
## Illiteracy 4.143e+00 8.744e-01 4.738 2.19e-05 ***
## Income 6.442e-05 6.837e-04 0.094 0.9253
## Frost 5.813e-04 1.005e-02 0.058 0.9541
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.535 on 45 degrees of freedom
## Multiple R-squared: 0.567, Adjusted R-squared: 0.5285
## F-statistic: 14.73 on 4 and 45 DF, p-value: 9.133e-08
confint(fit)
## 2.5 % 97.5 %
## (Intercept) -6.552191e+00 9.0213182149
## Population 4.136397e-05 0.0004059867
## Illiteracy 2.381799e+00 5.9038743192
## Income -1.312611e-03 0.0014414600
## Frost -1.966781e-02 0.0208304170
qqPlot(fit, labels = row.names(states), simulate = TRUE, main = 'График Q-Q')

## Nevada Rhode Island
## 28 39
durbinWatsonTest((fit))
## lag Autocorrelation D-W Statistic p-value
## 1 -0.2006929 2.317691 0.244
## Alternative hypothesis: rho != 0
crPlots((fit))

ncvTest((fit))
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 1.746514, Df = 1, p = 0.18632
spreadLevelPlot(fit)

##
## Suggested power transformation: 1.209626
library(gvlma)
summary(gvlma(fit))
##
## Call:
## lm(formula = Murder ~ Population + Illiteracy + Income + Frost,
## data = states)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7960 -1.6495 -0.0811 1.4815 7.6210
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.235e+00 3.866e+00 0.319 0.7510
## Population 2.237e-04 9.052e-05 2.471 0.0173 *
## Illiteracy 4.143e+00 8.744e-01 4.738 2.19e-05 ***
## Income 6.442e-05 6.837e-04 0.094 0.9253
## Frost 5.813e-04 1.005e-02 0.058 0.9541
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.535 on 45 degrees of freedom
## Multiple R-squared: 0.567, Adjusted R-squared: 0.5285
## F-statistic: 14.73 on 4 and 45 DF, p-value: 9.133e-08
##
##
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance = 0.05
##
## Call:
## gvlma(x = fit)
##
## Value p-value Decision
## Global Stat 2.7728 0.5965 Assumptions acceptable.
## Skewness 1.5374 0.2150 Assumptions acceptable.
## Kurtosis 0.6376 0.4246 Assumptions acceptable.
## Link Function 0.1154 0.7341 Assumptions acceptable.
## Heteroscedasticity 0.4824 0.4873 Assumptions acceptable.
sqrt(vif(fit))>2
## Population Illiteracy Income Frost
## FALSE FALSE FALSE FALSE