#Пример расчет множественной регрессии
#Пример - зависимость уровня убийств по штатам (на примере США) в зависимости от социально-экономических и прочих показателей (численность населения, грамотность, уровень доходов и количество холодных дней).
states<-data.frame(state.x77)
states <- as.data.frame(state.x77[,c("Murder", "Population", "Illiteracy", "Income", "Frost")])
cor(states)
##                Murder Population Illiteracy     Income      Frost
## Murder      1.0000000  0.3436428  0.7029752 -0.2300776 -0.5388834
## Population  0.3436428  1.0000000  0.1076224  0.2082276 -0.3321525
## Illiteracy  0.7029752  0.1076224  1.0000000 -0.4370752 -0.6719470
## Income     -0.2300776  0.2082276 -0.4370752  1.0000000  0.2262822
## Frost      -0.5388834 -0.3321525 -0.6719470  0.2262822  1.0000000
library(car)
## Loading required package: carData
scatterplotMatrix(states, lty = 2, main ='Соотношение переменных')

#Рассчитаем модель

fit <- lm(Murder ~ Population + Illiteracy + Income + Frost, data = states)
summary(fit)
## 
## Call:
## lm(formula = Murder ~ Population + Illiteracy + Income + Frost, 
##     data = states)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7960 -1.6495 -0.0811  1.4815  7.6210 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.235e+00  3.866e+00   0.319   0.7510    
## Population  2.237e-04  9.052e-05   2.471   0.0173 *  
## Illiteracy  4.143e+00  8.744e-01   4.738 2.19e-05 ***
## Income      6.442e-05  6.837e-04   0.094   0.9253    
## Frost       5.813e-04  1.005e-02   0.058   0.9541    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.535 on 45 degrees of freedom
## Multiple R-squared:  0.567,  Adjusted R-squared:  0.5285 
## F-statistic: 14.73 on 4 and 45 DF,  p-value: 9.133e-08
confint(fit)
##                     2.5 %       97.5 %
## (Intercept) -6.552191e+00 9.0213182149
## Population   4.136397e-05 0.0004059867
## Illiteracy   2.381799e+00 5.9038743192
## Income      -1.312611e-03 0.0014414600
## Frost       -1.966781e-02 0.0208304170
qqPlot(fit, labels = row.names(states), simulate = TRUE, main = 'График Q-Q')

##       Nevada Rhode Island 
##           28           39
durbinWatsonTest((fit))
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.2006929      2.317691   0.244
##  Alternative hypothesis: rho != 0
crPlots((fit))

ncvTest((fit))
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 1.746514, Df = 1, p = 0.18632
spreadLevelPlot(fit)

## 
## Suggested power transformation:  1.209626
library(gvlma)
summary(gvlma(fit))
## 
## Call:
## lm(formula = Murder ~ Population + Illiteracy + Income + Frost, 
##     data = states)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7960 -1.6495 -0.0811  1.4815  7.6210 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.235e+00  3.866e+00   0.319   0.7510    
## Population  2.237e-04  9.052e-05   2.471   0.0173 *  
## Illiteracy  4.143e+00  8.744e-01   4.738 2.19e-05 ***
## Income      6.442e-05  6.837e-04   0.094   0.9253    
## Frost       5.813e-04  1.005e-02   0.058   0.9541    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.535 on 45 degrees of freedom
## Multiple R-squared:  0.567,  Adjusted R-squared:  0.5285 
## F-statistic: 14.73 on 4 and 45 DF,  p-value: 9.133e-08
## 
## 
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance =  0.05 
## 
## Call:
##  gvlma(x = fit) 
## 
##                     Value p-value                Decision
## Global Stat        2.7728  0.5965 Assumptions acceptable.
## Skewness           1.5374  0.2150 Assumptions acceptable.
## Kurtosis           0.6376  0.4246 Assumptions acceptable.
## Link Function      0.1154  0.7341 Assumptions acceptable.
## Heteroscedasticity 0.4824  0.4873 Assumptions acceptable.
sqrt(vif(fit))>2
## Population Illiteracy     Income      Frost 
##      FALSE      FALSE      FALSE      FALSE