dr.sc. Luka Šikić
15 prosinac, 2019
Dijagram rasipanja koji pokazuje “raspolozenje” kao funkciju “sati spavanja”.
Regresijski pravac koji prikazuje odnos “raspolozenja” i “sati spavanja”.
Regresijski pravac koji loše prikazuje odnos “raspolozenja” i “sati spavanja”.
\[ \hat{Y_i} = b_1 X_i + b_0 \]
\[ \epsilon_i = Y_i - \hat{Y}_i \]
\[ Y_i = b_1 X_i + b_0 + \epsilon_i \] - OLS model
\[ \sum_i (Y_i - \hat{Y}_i)^2 \]
\[ \sum_i {\epsilon_i}^2 \] - Grafički prikaz OLS modela
Prikaz reziduala vezanih uz regresijski pravac.
Prikaz reziduala vezanih uz “loš” regresijski pravac.
# Procjeni regresijski model i spremi rezultate u objekt
regression.1 <- lm( formula = dan.grump ~ dan.sleep,
data = parenthood ) ##
## Call:
## lm(formula = dan.grump ~ dan.sleep, data = parenthood)
##
## Coefficients:
## (Intercept) dan.sleep
## 125.956 -8.937
\[ \hat{Y}_i = -8.94 \ X_i + 125.96 \]
\[ Y_i = b_2 X_{i2} + b_1 X_{i1} + b_0 + \epsilon_i \]
dan.grump ~ dan.sleep + baby.sleep
# Provedi višestruku regresiju u R
regression.2 <- lm( formula = dan.grump ~ dan.sleep + baby.sleep,
data = parenthood )
# Pregledaj rezultate
print(regression.2)##
## Call:
## lm(formula = dan.grump ~ dan.sleep + baby.sleep, data = parenthood)
##
## Coefficients:
## (Intercept) dan.sleep baby.sleep
## 125.96557 -8.95025 0.01052
\[ Y_i = \left( \sum_{k=1}^K b_{k} X_{ik} \right) + b_0 + \epsilon_i \]
\[ \mbox{SS}_{res} = \sum_i (Y_i - \hat{Y}_i)^2 \]
\[ \mbox{SS}_{tot} = \sum_i (Y_i - \bar{Y})^2 \]
## [1] 1838.722
## [1] -399525.4
\[ R^2 = 1 - \frac{\mbox{SS}_{res}}{\mbox{SS}_{tot}} \] 5. Izračunaj vrijednost
## [1] 1.004602
## [1] 0.8161027
\[ \mbox{adj. } R^2 = 1 - \left(\frac{\mbox{SS}_{res}}{\mbox{SS}_{tot}} \times \frac{N-1}{N-K-1} \right) \]
\[ H_0: Y_i = b_0 + \epsilon_i \]
\[ H_1: Y_i = \left( \sum_{k=1}^K b_{k} X_{ik} \right) + b_0 + \epsilon_i \]
\[ \mbox{SS}_{mod} = \mbox{SS}_{tot} - \mbox{SS}_{res} \]
\[ \begin{array}{rcl} \mbox{MS}_{mod} &=& \displaystyle\frac{\mbox{SS}_{mod} }{df_{mod}} \\ \\ \mbox{MS}_{res} &=& \displaystyle\frac{\mbox{SS}_{res} }{df_{res} } \end{array} \]
\[ F = \frac{\mbox{MS}_{mod}}{\mbox{MS}_{res}} \]
\[ \begin{array}{rl} H_0: & b = 0 \\ H_1: & b \neq 0 \end{array} \]
\[ t = \frac{\hat{b}}{\mbox{SE}({\hat{b})}} \]
##
## Call:
## lm(formula = dan.grump ~ dan.sleep + baby.sleep, data = parenthood)
##
## Coefficients:
## (Intercept) dan.sleep baby.sleep
## 125.96557 -8.95025 0.01052
##
## Call:
## lm(formula = dan.grump ~ dan.sleep + baby.sleep, data = parenthood)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.0345 -2.2198 -0.4016 2.6775 11.7496
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 125.96557 3.04095 41.423 <2e-16 ***
## dan.sleep -8.95025 0.55346 -16.172 <2e-16 ***
## baby.sleep 0.01052 0.27106 0.039 0.969
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.354 on 97 degrees of freedom
## Multiple R-squared: 0.8161, Adjusted R-squared: 0.8123
## F-statistic: 215.2 on 2 and 97 DF, p-value: < 2.2e-16
\[ D_i = \frac{{\epsilon_i^*}^2 }{K+1} \times \frac{h_i}{1-h_i} \]
# Provedi procjenu bez ekstremnih opservacija
lm(formula = dan.grump ~ dan.sleep + baby.sleep,
data = parenthood,
subset = -64)##
## Call:
## lm(formula = dan.grump ~ dan.sleep + baby.sleep, data = parenthood,
## subset = -64)
##
## Coefficients:
## (Intercept) dan.sleep baby.sleep
## 126.3553 -8.8283 -0.1319
# Prikaži reziduale na histogramu
hist( x = residuals(regression.2),
xlab = "Vrijednost reziduala",
main = "",
breaks = 20)# Spremi fit vrijednosti u objekt
yhat.2 <- fitted.values(object = regression.2)
# Prikaži grafički
plot(x = yhat.2,
y = parenthood$dan.grump,
xlab = "Fit",
ylab = "Observed")## Test stat Pr(>|Test stat|)
## dan.sleep 2.1604 0.03323 *
## baby.sleep -0.5445 0.58733
## Tukey test 2.1615 0.03066 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.09317511, Df = 1, p = 0.76018
## Loading required package: carData
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 125.965566 3.247285 38.7910 <2e-16 ***
## dan.sleep -8.950250 0.615820 -14.5339 <2e-16 ***
## baby.sleep 0.010524 0.291565 0.0361 0.9713
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\[ \mbox{VIF}_k = \frac{1}{1-{R^2_{(-k)}}} \]
## dan.sleep baby.sleep
## 1.651038 1.651038
## dan.sleep baby.sleep dan.grump day
## dan.sleep 1.00000000 0.62794934 -0.90338404 -0.09840768
## baby.sleep 0.62794934 1.00000000 -0.56596373 -0.01043394
## dan.grump -0.90338404 -0.56596373 1.00000000 0.07647926
## day -0.09840768 -0.01043394 0.07647926 1.00000000
\[ \mbox{AIC} = \displaystyle\frac{\mbox{SS}_{res}}{\hat{\sigma}}^2+ 2K \] 2. Selekcija unatrag
# Specificiraj puni model
full.model <- lm(formula = dan.grump ~ dan.sleep + baby.sleep + day,
data = parenthood)
# Selekcija unatrag
step(object = full.model,
direction = "backward")## Start: AIC=299.08
## dan.grump ~ dan.sleep + baby.sleep + day
##
## Df Sum of Sq RSS AIC
## - baby.sleep 1 0.1 1837.2 297.08
## - day 1 1.6 1838.7 297.16
## <none> 1837.1 299.08
## - dan.sleep 1 4909.0 6746.1 427.15
##
## Step: AIC=297.08
## dan.grump ~ dan.sleep + day
##
## Df Sum of Sq RSS AIC
## - day 1 1.6 1838.7 295.17
## <none> 1837.2 297.08
## - dan.sleep 1 8103.0 9940.1 463.92
##
## Step: AIC=295.17
## dan.grump ~ dan.sleep
##
## Df Sum of Sq RSS AIC
## <none> 1838.7 295.17
## - dan.sleep 1 8159.9 9998.6 462.50
##
## Call:
## lm(formula = dan.grump ~ dan.sleep, data = parenthood)
##
## Coefficients:
## (Intercept) dan.sleep
## 125.956 -8.937
# Procijeni osnovni model
nul.model <- lm(dan.grump ~ 1, parenthood)
# Definiraj selekcijsku funkciju (unaprijed)
step(object = nul.model,
direction = "forward",
scope = dan.grump ~ dan.sleep + baby.sleep + day)## Start: AIC=462.5
## dan.grump ~ 1
##
## Df Sum of Sq RSS AIC
## + dan.sleep 1 8159.9 1838.7 295.17
## + baby.sleep 1 3202.7 6795.9 425.89
## <none> 9998.6 462.50
## + day 1 58.5 9940.1 463.92
##
## Step: AIC=295.17
## dan.grump ~ dan.sleep
##
## Df Sum of Sq RSS AIC
## <none> 1838.7 295.17
## + day 1 1.55760 1837.2 297.08
## + baby.sleep 1 0.02858 1838.7 297.16
##
## Call:
## lm(formula = dan.grump ~ dan.sleep, data = parenthood)
##
## Coefficients:
## (Intercept) dan.sleep
## 125.956 -8.937
# Procjeni dva ugnježdena modela
M0 <- lm( dan.grump ~ dan.sleep + day, parenthood )
M1 <- lm( dan.grump ~ dan.sleep + day + baby.sleep, parenthood )
# Usporedi modele
AIC( M0, M1 )## df AIC
## M0 4 582.8681
## M1 5 584.8646
\[ F = \frac{(\mbox{SS}_{res}^{(0)} - \mbox{SS}_{res}^{(1)})/k}{(\mbox{SS}_{res}^{(1)})/(N-p-1)} \]
\[ \mbox{SS}_\Delta = \mbox{SS}_{res}^{(0)} - \mbox{SS}_{res}^{(1)} \]
\[ \mbox{SS}_\Delta = \sum_{i} \left( \hat{y}_i^{(1)} - \hat{y}_i^{(0)} \right)^2 \]
## Analysis of Variance Table
##
## Model 1: dan.grump ~ dan.sleep + day
## Model 2: dan.grump ~ dan.sleep + day + baby.sleep
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 97 1837.2
## 2 96 1837.1 1 0.063688 0.0033 0.9541