Heimaverkefni 4 Hoft

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dat <- read.table("verkefni_4_2026_header.txt", header = TRUE)

a-liður

dat_long <- dat %>%
  pivot_longer(cols = starts_with("x"),
               names_to = "xvar",
               values_to = "xvalue")

ggplot(dat_long, aes(x = xvalue, y = Y)) +
  geom_point(color = "hotpink", size = 3) +
  facet_wrap(~ xvar, scales = "free_x")

pairs(dat, main = "Pairs plot: Y og x1-x6")

b-liður

cor(dat)
##             Y          x1          x2          x3         x4         x5
## Y   1.0000000  0.32306071 -0.24968780  0.69082860 -0.5574363 -0.3347743
## x1  0.3230607  1.00000000 -0.06432696  0.23521052 -0.0689409 -0.2190429
## x2 -0.2496878 -0.06432696  1.00000000  0.00307297  0.4462797  0.4699373
## x3  0.6908286  0.23521052  0.00307297  1.00000000 -0.2247722 -0.0404318
## x4 -0.5574363 -0.06894090  0.44627974 -0.22477219  1.0000000  0.4007609
## x5 -0.3347743 -0.21904291  0.46993729 -0.04043180  0.4007609  1.0000000
## x6 -0.2480903 -0.38948249  0.19276170 -0.21524481  0.3843357  0.4293733
##            x6
## Y  -0.2480903
## x1 -0.3894825
## x2  0.1927617
## x3 -0.2152448
## x4  0.3843357
## x5  0.4293733
## x6  1.0000000

c-liður

lm1 <- lm(Y~x1, data=dat)
summary(lm1)
## 
## Call:
## lm(formula = Y ~ x1, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9899 -0.9913 -0.1316  0.5795  2.5879 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   3.7677     4.5316   0.831    0.417
## x1            1.0972     0.7796   1.407    0.177
## 
## Residual standard error: 1.323 on 17 degrees of freedom
## Multiple R-squared:  0.1044, Adjusted R-squared:  0.05168 
## F-statistic: 1.981 on 1 and 17 DF,  p-value: 0.1773
lm2 <- lm(Y~x2, data=dat)
summary(lm2)
## 
## Call:
## lm(formula = Y ~ x2, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.05453 -1.18306  0.05259  1.03118  2.08841 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  17.0529     6.5175   2.616   0.0181 *
## x2           -0.3571     0.3358  -1.063   0.3026  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.354 on 17 degrees of freedom
## Multiple R-squared:  0.06234,    Adjusted R-squared:  0.007188 
## F-statistic:  1.13 on 1 and 17 DF,  p-value: 0.3026
lm3 <- lm(Y~x3, data=dat)
summary(lm3)
## 
## Call:
## lm(formula = Y ~ x3, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.94395 -0.72298 -0.06256  0.84675  1.64911 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -8.9710     4.8545  -1.848  0.08208 . 
## x3            0.5931     0.1505   3.940  0.00106 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.011 on 17 degrees of freedom
## Multiple R-squared:  0.4772, Adjusted R-squared:  0.4465 
## F-statistic: 15.52 on 1 and 17 DF,  p-value: 0.001057
lm4 <- lm(Y~x4, data=dat)
summary(lm4)
## 
## Call:
## lm(formula = Y ~ x4, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6415 -0.9401 -0.1711  0.8832  1.9009 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 24.60167    5.23368   4.701 0.000206 ***
## x4          -0.16344    0.05904  -2.768 0.013151 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.161 on 17 degrees of freedom
## Multiple R-squared:  0.3107, Adjusted R-squared:  0.2702 
## F-statistic: 7.664 on 1 and 17 DF,  p-value: 0.01315
lm5 <- lm(Y~x5, data=dat)
summary(lm5)
## 
## Call:
## lm(formula = Y ~ x5, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0608 -0.7866 -0.0608  1.0634  2.2392 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   19.475      6.386   3.050  0.00724 **
## x5            -4.483      3.060  -1.465  0.16122   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.317 on 17 degrees of freedom
## Multiple R-squared:  0.1121, Adjusted R-squared:  0.05984 
## F-statistic: 2.146 on 1 and 17 DF,  p-value: 0.1612
lm6 <- lm(Y~x6, data=dat)
summary(lm6)
## 
## Call:
## lm(formula = Y ~ x6, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1218 -0.9779 -0.4218  0.9236  2.4721 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   13.966      3.645   3.832  0.00134 **
## x6            -1.969      1.865  -1.056  0.30578   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.354 on 17 degrees of freedom
## Multiple R-squared:  0.06155,    Adjusted R-squared:  0.006346 
## F-statistic: 1.115 on 1 and 17 DF,  p-value: 0.3058

d-liður

lmm1 <- lm(Y~x1+x2, data=dat)
summary(lmm1)
## 
## Call:
## lm(formula = Y ~ x1 + x2, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.95459 -1.01699  0.03796  0.73988  2.32005 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  10.4306     8.0618   1.294    0.214
## x1            1.0470     0.7812   1.340    0.199
## x2           -0.3287     0.3289  -0.999    0.333
## 
## Residual standard error: 1.323 on 16 degrees of freedom
## Multiple R-squared:  0.157,  Adjusted R-squared:  0.05161 
## F-statistic:  1.49 on 2 and 16 DF,  p-value: 0.2551
lmm2 <- lm(Y~x1+x3, data=dat)
summary(lmm2)
## 
## Call:
## lm(formula = Y ~ x1 + x3, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.78382 -0.61681  0.02068  0.77631  1.39037 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -11.2138     5.4257  -2.067  0.05533 . 
## x1            0.5773     0.6149   0.939  0.36178   
## x3            0.5587     0.1554   3.595  0.00243 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.014 on 16 degrees of freedom
## Multiple R-squared:  0.5045, Adjusted R-squared:  0.4426 
## F-statistic: 8.147 on 2 and 16 DF,  p-value: 0.003632
lmm3 <- lm(Y~x1+x4, data=dat)
summary(lmm3)
## 
## Call:
## lm(formula = Y ~ x1 + x4, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4527 -0.7209 -0.5300  0.9651  1.9104 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 18.45623    6.57971   2.805   0.0127 *
## x1           0.97132    0.66357   1.464   0.1626  
## x4          -0.15766    0.05728  -2.752   0.0142 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.123 on 16 degrees of freedom
## Multiple R-squared:  0.3921, Adjusted R-squared:  0.3162 
## F-statistic: 5.161 on 2 and 16 DF,  p-value: 0.01864
lmm4 <- lm(Y~x1+x5, data=dat)
summary(lmm4)
## 
## Call:
## lm(formula = Y ~ x1 + x5, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.94430 -0.99476 -0.07294  0.87318  2.22706 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  12.7040     8.7236   1.456    0.165
## x1            0.8909     0.7892   1.129    0.276
## x5           -3.7135     3.1115  -1.193    0.250
## 
## Residual standard error: 1.307 on 16 degrees of freedom
## Multiple R-squared:  0.1776, Adjusted R-squared:  0.07478 
## F-statistic: 1.727 on 2 and 16 DF,  p-value: 0.2093
lmm5 <- lm(Y~x1+x6, data=dat)
summary(lmm5)
## 
## Call:
## lm(formula = Y ~ x1 + x6, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7009 -1.0257 -0.1130  0.5588  2.6099 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   7.1011     7.4829   0.949    0.357
## x1            0.9066     0.8638   1.049    0.310
## x6           -1.1439     2.0187  -0.567    0.579
## 
## Residual standard error: 1.35 on 16 degrees of freedom
## Multiple R-squared:  0.122,  Adjusted R-squared:  0.01224 
## F-statistic: 1.112 on 2 and 16 DF,  p-value: 0.3532
lmm6 <- lm(Y~x2+x3, data=dat)
summary(lmm6)
## 
## Call:
## lm(formula = Y ~ x2 + x3, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.79435 -0.56978 -0.07551  0.81264  1.58331 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.0122     6.6277  -0.304 0.765341    
## x2           -0.3601     0.2423  -1.486 0.156675    
## x3            0.5937     0.1455   4.082 0.000869 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9766 on 16 degrees of freedom
## Multiple R-squared:  0.5407, Adjusted R-squared:  0.4832 
## F-statistic: 9.416 on 2 and 16 DF,  p-value: 0.001982
lmm7 <- lm(Y~x2+x4, data=dat)
summary(lmm7)
## 
## Call:
## lm(formula = Y ~ x2 + x4, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6410 -0.9396 -0.1690  0.8821  1.9002 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 24.620114   6.565670   3.750  0.00175 **
## x2          -0.001634   0.331669  -0.005  0.99613   
## x4          -0.163286   0.068001  -2.401  0.02885 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.196 on 16 degrees of freedom
## Multiple R-squared:  0.3107, Adjusted R-squared:  0.2246 
## F-statistic: 3.607 on 2 and 16 DF,  p-value: 0.05094
lmm8 <- lm(Y~x2+x5, data=dat)
summary(lmm8)
## 
## Call:
## lm(formula = Y ~ x2 + x5, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.03599 -0.88391  0.01486  1.04449  2.12839 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  21.2062     7.6023   2.789   0.0131 *
## x2           -0.1695     0.3793  -0.447   0.6609  
## x5           -3.7370     3.5517  -1.052   0.3084  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.349 on 16 degrees of freedom
## Multiple R-squared:  0.123,  Adjusted R-squared:  0.0134 
## F-statistic: 1.122 on 2 and 16 DF,  p-value: 0.3499
lmm9 <- lm(Y~x2+x6, data=dat)
summary(lmm9)
## 
## Call:
## lm(formula = Y ~ x2 + x6, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9498 -0.9500 -0.2198  1.0097  2.2201 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  19.1531     7.0059   2.734   0.0147 *
## x2           -0.2998     0.3449  -0.869   0.3975  
## x6           -1.6483     1.9142  -0.861   0.4019  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.364 on 16 degrees of freedom
## Multiple R-squared:  0.1039, Adjusted R-squared:  -0.008145 
## F-statistic: 0.9273 on 2 and 16 DF,  p-value: 0.4159
lmm10 <- lm(Y~x3+x4, data=dat)
summary(lmm10)
## 
## Call:
## lm(formula = Y ~ x3 + x4, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.78402 -0.51140 -0.00655  0.45487  1.51206 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  4.65631    6.39482   0.728  0.47705   
## x3           0.51133    0.13075   3.911  0.00125 **
## x4          -0.12418    0.04466  -2.781  0.01336 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8554 on 16 degrees of freedom
## Multiple R-squared:  0.6476, Adjusted R-squared:  0.6035 
## F-statistic:  14.7 on 2 and 16 DF,  p-value: 0.0002379
lmm11 <- lm(Y~x3+x5, data=dat)
summary(lmm11)
## 
## Call:
## lm(formula = Y ~ x3 + x5, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8769 -0.5410  0.1714  0.7238  1.3841 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.04954    6.56674  -0.008 0.994074    
## x3           0.58239    0.14059   4.142 0.000766 ***
## x5          -4.11565    2.19309  -1.877 0.078927 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9432 on 16 degrees of freedom
## Multiple R-squared:  0.5716, Adjusted R-squared:  0.518 
## F-statistic: 10.67 on 2 and 16 DF,  p-value: 0.001136
lmm12 <- lm(Y~x3+x6, data=dat)
summary(lmm12)
## 
## Call:
## lm(formula = Y ~ x3 + x6, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0622 -0.6179 -0.1878  0.8726  1.5116 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -6.7399     6.3192  -1.067  0.30200   
## x3            0.5738     0.1573   3.647  0.00217 **
## x6           -0.8272     1.4544  -0.569  0.57743   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.031 on 16 degrees of freedom
## Multiple R-squared:  0.4876, Adjusted R-squared:  0.4236 
## F-statistic: 7.613 on 2 and 16 DF,  p-value: 0.004752
lmm13 <- lm(Y~x4+x5, data=dat)
summary(lmm13)
## 
## Call:
## lm(formula = Y ~ x4 + x5, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.63156 -0.83559 -0.08159  0.88298  1.95448 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  26.9245     6.6235   4.065   0.0009 ***
## x4           -0.1479     0.0657  -2.250   0.0389 *  
## x5           -1.7768     3.0009  -0.592   0.5621    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.183 on 16 degrees of freedom
## Multiple R-squared:  0.3255, Adjusted R-squared:  0.2412 
## F-statistic: 3.861 on 2 and 16 DF,  p-value: 0.04283
lmm14 <- lm(Y~x4+x6, data=dat)
summary(lmm14)
## 
## Call:
## lm(formula = Y ~ x4 + x6, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6931 -0.9068 -0.1607  0.9014  1.8618 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 24.81928    5.52822   4.490 0.000371 ***
## x4          -0.15896    0.06585  -2.414 0.028129 *  
## x6          -0.31520    1.78265  -0.177 0.861870    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.195 on 16 degrees of freedom
## Multiple R-squared:  0.3121, Adjusted R-squared:  0.2261 
## F-statistic: 3.629 on 2 and 16 DF,  p-value: 0.05015
lmm15 <- lm(Y~x5+x6, data=dat)
summary(lmm15)
## 
## Call:
## lm(formula = Y ~ x5 + x6, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9221 -0.6816 -0.2220  1.0413  2.0779 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   19.919      6.594   3.021  0.00812 **
## x5            -3.747      3.467  -1.081  0.29572   
## x6            -1.015      2.055  -0.494  0.62789   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.348 on 16 degrees of freedom
## Multiple R-squared:  0.1254, Adjusted R-squared:  0.0161 
## F-statistic: 1.147 on 2 and 16 DF,  p-value: 0.3423

e-liður

m_full <- lm(Y ~ x1 + x2 + x3 + x4 + x5 + x6, data = dat)
summary(m_full)
## 
## Call:
## lm(formula = Y ~ x1 + x2 + x3 + x4 + x5 + x6, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.19036 -0.39392  0.00724  0.41801  1.34725 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  3.13682    8.10979   0.387  0.70568   
## x1           0.64443    0.58892   1.094  0.29532   
## x2          -0.01042    0.26765  -0.039  0.96959   
## x3           0.50465    0.14234   3.545  0.00403 **
## x4          -0.11967    0.05623  -2.128  0.05475 . 
## x5          -2.46177    2.59776  -0.948  0.36200   
## x6           1.50441    1.51936   0.990  0.34164   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8941 on 12 degrees of freedom
## Multiple R-squared:  0.7113, Adjusted R-squared:  0.5669 
## F-statistic: 4.927 on 6 and 12 DF,  p-value: 0.00921

f-liður

Bera saman og velja besta

g-liður

m0 <- lm(Y ~ 1, data = dat)
mstep <- step(m0, scope = list(lower = m0, upper = m_full), direction = "both", trace = 0)
summary(mstep)
## 
## Call:
## lm(formula = Y ~ x3 + x4, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.78402 -0.51140 -0.00655  0.45487  1.51206 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  4.65631    6.39482   0.728  0.47705   
## x3           0.51133    0.13075   3.911  0.00125 **
## x4          -0.12418    0.04466  -2.781  0.01336 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8554 on 16 degrees of freedom
## Multiple R-squared:  0.6476, Adjusted R-squared:  0.6035 
## F-statistic:  14.7 on 2 and 16 DF,  p-value: 0.0002379

h-liður

par(mfrow = c(2,2))
plot(mstep)

par(mfrow = c(1,1))

i-liður

newx <- data.frame(x1=11, x2=18, x3=31, x4=100, x5=2, x6=2)

pred <- predict(mstep, newdata = newx, interval = "prediction", level = 0.95)
pred
##        fit      lwr      upr
## 1 8.089072 5.947112 10.23103