linear regresssion with lm

Author

kirit ved

linear regression with r

setting r environment

rm(list=ls(all.names = T))
#setwd("d:/met/met_lect3")
r_start=function(){
  rm(list=ls(all.names=T))
  setwd("d:/met/met_lect3")
  set.seed(1234)
 
  if(!require(pacman)) {
    install.packages(pacman)
  }
  library(pacman)
   pacman::p_loaded()
   
}
r_end=function(){
  p_unload(all)
  
  rm(list=ls(all.names=T))
}
p="d:/met/met_lect3"
q=5678

creating dataframe

#| echo: true
#| label: l1
#| warning: false
#| message: false
# https://rpubs.com/kiritved/lm1234
r_start()
Loading required package: pacman
[1] "pacman"
pacman::p_load(tidyverse)
n=10
x=1:n
y=23.67*x+54.78+runif(n,-50,50)
d=data.frame(x,y)
d
    x         y
1   1  39.82034
2   2 114.34994
3   3 136.71747
4   4 161.79794
5   5 209.22154
6   6 210.83106
7   7 171.41958
8   8 217.39505
9   9 284.41838
10 10 292.90511

checking correlation

#| warning: false
#| label: l2
#| message: false
cor(x,y)
[1] 0.9335673

fitting values

mylm=lm(y~x,d);mylm;summary(mylm);str(mylm)

Call:
lm(formula = y ~ x, data = d)

Coefficients:
(Intercept)            x  
      53.82        23.65  

Call:
lm(formula = y ~ x, data = d)

Residuals:
   Min     1Q Median     3Q    Max 
-47.94 -18.56  12.59  14.69  37.16 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)    53.82      19.92   2.702    0.027 *  
x              23.65       3.21   7.367 7.86e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 29.16 on 8 degrees of freedom
Multiple R-squared:  0.8715,    Adjusted R-squared:  0.8555 
F-statistic: 54.28 on 1 and 8 DF,  p-value: 7.861e-05
List of 12
 $ coefficients : Named num [1:2] 53.8 23.6
  ..- attr(*, "names")= chr [1:2] "(Intercept)" "x"
 $ residuals    : Named num [1:10] -37.6 13.2 12 13.4 37.2 ...
  ..- attr(*, "names")= chr [1:10] "1" "2" "3" "4" ...
 $ effects      : Named num [1:10] -581.5 214.8 18.2 21.6 47.3 ...
  ..- attr(*, "names")= chr [1:10] "(Intercept)" "x" "" "" ...
 $ rank         : int 2
 $ fitted.values: Named num [1:10] 77.5 101.1 124.8 148.4 172.1 ...
  ..- attr(*, "names")= chr [1:10] "1" "2" "3" "4" ...
 $ assign       : int [1:2] 0 1
 $ qr           :List of 5
  ..$ qr   : num [1:10, 1:2] -3.162 0.316 0.316 0.316 0.316 ...
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:10] "1" "2" "3" "4" ...
  .. .. ..$ : chr [1:2] "(Intercept)" "x"
  .. ..- attr(*, "assign")= int [1:2] 0 1
  ..$ qraux: num [1:2] 1.32 1.27
  ..$ pivot: int [1:2] 1 2
  ..$ tol  : num 1e-07
  ..$ rank : int 2
  ..- attr(*, "class")= chr "qr"
 $ df.residual  : int 8
 $ xlevels      : Named list()
 $ call         : language lm(formula = y ~ x, data = d)
 $ terms        :Classes 'terms', 'formula'  language y ~ x
  .. ..- attr(*, "variables")= language list(y, x)
  .. ..- attr(*, "factors")= int [1:2, 1] 0 1
  .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. ..$ : chr [1:2] "y" "x"
  .. .. .. ..$ : chr "x"
  .. ..- attr(*, "term.labels")= chr "x"
  .. ..- attr(*, "order")= int 1
  .. ..- attr(*, "intercept")= int 1
  .. ..- attr(*, "response")= int 1
  .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
  .. ..- attr(*, "predvars")= language list(y, x)
  .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
  .. .. ..- attr(*, "names")= chr [1:2] "y" "x"
 $ model        :'data.frame':  10 obs. of  2 variables:
  ..$ y: num [1:10] 39.8 114.3 136.7 161.8 209.2 ...
  ..$ x: int [1:10] 1 2 3 4 5 6 7 8 9 10
  ..- attr(*, "terms")=Classes 'terms', 'formula'  language y ~ x
  .. .. ..- attr(*, "variables")= language list(y, x)
  .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
  .. .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. .. ..$ : chr [1:2] "y" "x"
  .. .. .. .. ..$ : chr "x"
  .. .. ..- attr(*, "term.labels")= chr "x"
  .. .. ..- attr(*, "order")= int 1
  .. .. ..- attr(*, "intercept")= int 1
  .. .. ..- attr(*, "response")= int 1
  .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
  .. .. ..- attr(*, "predvars")= language list(y, x)
  .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
  .. .. .. ..- attr(*, "names")= chr [1:2] "y" "x"
 - attr(*, "class")= chr "lm"
plot(x,y,col="lightgreen",lwd=5,pch=23,main="plot of y vs x",xlab="x",ylab="y",type="b")
lines(d$x,mylm$fitted.values)

x1=c(7.3,2.9,6.78)
dpred=data.frame(x=x1)
dpred
     x
1 7.30
2 2.90
3 6.78
ypred=predict(mylm,dpred,add=T)
ypred
       1        2        3 
226.4561 122.3999 214.1585 
dpred$ypred=ypred
dpred
     x    ypred
1 7.30 226.4561
2 2.90 122.3999
3 6.78 214.1585

multivariate regression using lm

creating data frame

n=25
x1=runif(n,-50,50)
x2=rnorm(n,50,20)
x3=runif(n,-100,100)
y=3*x1+4*x2+5*x3+56+runif(n,-100,100)
d=data.frame(x1,x2,x3,y)
d
           x1        x2         x3          y
1   19.359129 64.104869  79.716098  811.73767
2    4.497484 37.059620 -22.100043   27.38452
3  -21.726642 67.363617 -37.825844  161.20631
4   42.343348 57.512712 -67.994267   -2.52689
5  -20.768416 56.205243  79.237170  558.63290
6   33.729563 50.100139 -66.721244  106.60058
7  -21.377672 49.247395  80.084919  678.45178
8  -23.317922 64.479521 -73.184361 -166.12624
9  -31.327721 40.065223 -73.677173 -321.41392
10 -26.777409 50.227903 -78.942499 -158.70102
11 -18.338755 50.197199   2.316716  262.21156
12 -19.730663 63.565428 -39.960189  134.46362
13 -34.095400 70.591261 -94.656621 -138.28461
14 -46.000408 15.409430 -38.070514 -122.24393
15 -28.120046  5.913038  48.423931  234.63875
16  31.059855 60.863458 -92.908655 -115.21797
17   2.569755 50.428544  13.015222  280.80869
18  41.465817 53.251316 -43.948445  174.31152
19  33.134505 74.835085 -59.160737  158.33341
20 -45.422974 65.655550 -73.252220 -220.21866
21  -4.390852 50.962414 -34.863615  164.80359
22 -23.481333 20.491997 -68.987606 -250.59417
23 -19.532780 58.715246 -74.007572 -212.28854
24   0.730687 48.590596 -12.893788  172.69491
25 -31.890379 52.223931 -92.271469 -209.26938
cor(d)
            x1         x2           x3         y
x1 1.000000000  0.2862297  0.003421378 0.2964186
x2 0.286229730  1.0000000 -0.209364902 0.1121923
x3 0.003421378 -0.2093649  1.000000000 0.9020780
y  0.296418603  0.1121923  0.902078036 1.0000000
mylm=lm(y~.,d)
mylm

Call:
lm(formula = y ~ ., data = d)

Coefficients:
(Intercept)           x1           x2           x3  
     49.356        2.379        4.325        5.054  
summary(mylm)

Call:
lm(formula = y ~ ., data = d)

Residuals:
   Min     1Q Median     3Q    Max 
-97.16 -58.54  15.95  59.09  97.53 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  49.3557    47.1168   1.048 0.306768    
x1            2.3790     0.5265   4.518 0.000188 ***
x2            4.3250     0.8760   4.937 6.96e-05 ***
x3            5.0540     0.2554  19.790 4.63e-15 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 67.2 on 21 degrees of freedom
Multiple R-squared:  0.9536,    Adjusted R-squared:  0.947 
F-statistic: 143.9 on 3 and 21 DF,  p-value: 3.674e-14
str(mylm)
List of 12
 $ coefficients : Named num [1:4] 49.36 2.38 4.32 5.05
  ..- attr(*, "names")= chr [1:4] "(Intercept)" "x1" "x2" "x3"
 $ residuals    : Named num [1:25] 36.2 -81.3 63.4 -57.7 -84.9 ...
  ..- attr(*, "names")= chr [1:25] "1" "2" "3" "4" ...
 $ effects      : Named num [1:25] -401.9 -423.9 -40.8 1329.9 -114.5 ...
  ..- attr(*, "names")= chr [1:25] "(Intercept)" "x1" "x2" "x3" ...
 $ rank         : int 4
 $ fitted.values: Named num [1:25] 775.5 108.6 97.8 55.2 643.5 ...
  ..- attr(*, "names")= chr [1:25] "1" "2" "3" "4" ...
 $ assign       : int [1:4] 0 1 2 3
 $ qr           :List of 5
  ..$ qr   : num [1:25, 1:4] -5 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 ...
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:25] "1" "2" "3" "4" ...
  .. .. ..$ : chr [1:4] "(Intercept)" "x1" "x2" "x3"
  .. ..- attr(*, "assign")= int [1:4] 0 1 2 3
  ..$ qraux: num [1:4] 1.2 1.06 1.19 1.19
  ..$ pivot: int [1:4] 1 2 3 4
  ..$ tol  : num 1e-07
  ..$ rank : int 4
  ..- attr(*, "class")= chr "qr"
 $ df.residual  : int 21
 $ xlevels      : Named list()
 $ call         : language lm(formula = y ~ ., data = d)
 $ terms        :Classes 'terms', 'formula'  language y ~ x1 + x2 + x3
  .. ..- attr(*, "variables")= language list(y, x1, x2, x3)
  .. ..- attr(*, "factors")= int [1:4, 1:3] 0 1 0 0 0 0 1 0 0 0 ...
  .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. ..$ : chr [1:4] "y" "x1" "x2" "x3"
  .. .. .. ..$ : chr [1:3] "x1" "x2" "x3"
  .. ..- attr(*, "term.labels")= chr [1:3] "x1" "x2" "x3"
  .. ..- attr(*, "order")= int [1:3] 1 1 1
  .. ..- attr(*, "intercept")= int 1
  .. ..- attr(*, "response")= int 1
  .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
  .. ..- attr(*, "predvars")= language list(y, x1, x2, x3)
  .. ..- attr(*, "dataClasses")= Named chr [1:4] "numeric" "numeric" "numeric" "numeric"
  .. .. ..- attr(*, "names")= chr [1:4] "y" "x1" "x2" "x3"
 $ model        :'data.frame':  25 obs. of  4 variables:
  ..$ y : num [1:25] 811.74 27.38 161.21 -2.53 558.63 ...
  ..$ x1: num [1:25] 19.4 4.5 -21.7 42.3 -20.8 ...
  ..$ x2: num [1:25] 64.1 37.1 67.4 57.5 56.2 ...
  ..$ x3: num [1:25] 79.7 -22.1 -37.8 -68 79.2 ...
  ..- attr(*, "terms")=Classes 'terms', 'formula'  language y ~ x1 + x2 + x3
  .. .. ..- attr(*, "variables")= language list(y, x1, x2, x3)
  .. .. ..- attr(*, "factors")= int [1:4, 1:3] 0 1 0 0 0 0 1 0 0 0 ...
  .. .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. .. ..$ : chr [1:4] "y" "x1" "x2" "x3"
  .. .. .. .. ..$ : chr [1:3] "x1" "x2" "x3"
  .. .. ..- attr(*, "term.labels")= chr [1:3] "x1" "x2" "x3"
  .. .. ..- attr(*, "order")= int [1:3] 1 1 1
  .. .. ..- attr(*, "intercept")= int 1
  .. .. ..- attr(*, "response")= int 1
  .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
  .. .. ..- attr(*, "predvars")= language list(y, x1, x2, x3)
  .. .. ..- attr(*, "dataClasses")= Named chr [1:4] "numeric" "numeric" "numeric" "numeric"
  .. .. .. ..- attr(*, "names")= chr [1:4] "y" "x1" "x2" "x3"
 - attr(*, "class")= chr "lm"

deciding input variable values to predict y

k=3
x1=runif(k,min(x1),max(x1))
x2=runif(k,min(x2),max(x2))
x3=runif(k,min(x3),max(x3))
dpred=data.frame(x1,x2,x3)
dpred
         x1       x2        x3
1 -4.673876 49.45405  77.22529
2 34.230666 65.81721 -37.97291
3  6.806479 40.56358 -10.54042
ypred=predict(mylm,dpred)
ypred
       1        2        3 
642.4215 223.5317 187.7134 
dpred$ypred=ypred
dpred
         x1       x2        x3    ypred
1 -4.673876 49.45405  77.22529 642.4215
2 34.230666 65.81721 -37.97291 223.5317
3  6.806479 40.56358 -10.54042 187.7134

closing r environment

r_end()
The following packages have been unloaded:
lubridate, forcats, stringr, dplyr, purrr, readr, tidyr, tibble, ggplot2, tidyverse, pacman