rm(list=ls(all.names = T))
#setwd("d:/met/met_lect3")
=function(){
r_startrm(list=ls(all.names=T))
setwd("d:/met/met_lect3")
set.seed(1234)
if(!require(pacman)) {
install.packages(pacman)
}library(pacman)
::p_loaded()
pacman
}=function(){
r_endp_unload(all)
rm(list=ls(all.names=T))
}="d:/met/met_lect3"
p=5678 q
Linear Regresssion with lm in R
2 linear regression with r
2.1 setting r environment
2.2 creating dataframe
#| echo: true
#| warning: false
#| message: false
# https://rpubs.com/kiritved/lm1234
r_start()
Loading required package: pacman
[1] "pacman"
::p_load(tidyverse)
pacman=10
n=1:n
x=23.67*x+54.78+runif(n,-50,50)
y=data.frame(x,y)
d d
x y
1 1 39.82034
2 2 114.34994
3 3 136.71747
4 4 161.79794
5 5 209.22154
6 6 210.83106
7 7 171.41958
8 8 217.39505
9 9 284.41838
10 10 292.90511
2.3 checking correlation
#| warning: false
#| message: false
cor(x,y)
[1] 0.9335673
2.4 fitting values
=lm(y~x,d);mylm;summary(mylm);str(mylm) mylm
Call:
lm(formula = y ~ x, data = d)
Coefficients:
(Intercept) x
53.82 23.65
Call:
lm(formula = y ~ x, data = d)
Residuals:
Min 1Q Median 3Q Max
-47.94 -18.56 12.59 14.69 37.16
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 53.82 19.92 2.702 0.027 *
x 23.65 3.21 7.367 7.86e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 29.16 on 8 degrees of freedom
Multiple R-squared: 0.8715, Adjusted R-squared: 0.8555
F-statistic: 54.28 on 1 and 8 DF, p-value: 7.861e-05
List of 12
$ coefficients : Named num [1:2] 53.8 23.6
..- attr(*, "names")= chr [1:2] "(Intercept)" "x"
$ residuals : Named num [1:10] -37.6 13.2 12 13.4 37.2 ...
..- attr(*, "names")= chr [1:10] "1" "2" "3" "4" ...
$ effects : Named num [1:10] -581.5 214.8 18.2 21.6 47.3 ...
..- attr(*, "names")= chr [1:10] "(Intercept)" "x" "" "" ...
$ rank : int 2
$ fitted.values: Named num [1:10] 77.5 101.1 124.8 148.4 172.1 ...
..- attr(*, "names")= chr [1:10] "1" "2" "3" "4" ...
$ assign : int [1:2] 0 1
$ qr :List of 5
..$ qr : num [1:10, 1:2] -3.162 0.316 0.316 0.316 0.316 ...
.. ..- attr(*, "dimnames")=List of 2
.. .. ..$ : chr [1:10] "1" "2" "3" "4" ...
.. .. ..$ : chr [1:2] "(Intercept)" "x"
.. ..- attr(*, "assign")= int [1:2] 0 1
..$ qraux: num [1:2] 1.32 1.27
..$ pivot: int [1:2] 1 2
..$ tol : num 1e-07
..$ rank : int 2
..- attr(*, "class")= chr "qr"
$ df.residual : int 8
$ xlevels : Named list()
$ call : language lm(formula = y ~ x, data = d)
$ terms :Classes 'terms', 'formula' language y ~ x
.. ..- attr(*, "variables")= language list(y, x)
.. ..- attr(*, "factors")= int [1:2, 1] 0 1
.. .. ..- attr(*, "dimnames")=List of 2
.. .. .. ..$ : chr [1:2] "y" "x"
.. .. .. ..$ : chr "x"
.. ..- attr(*, "term.labels")= chr "x"
.. ..- attr(*, "order")= int 1
.. ..- attr(*, "intercept")= int 1
.. ..- attr(*, "response")= int 1
.. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
.. ..- attr(*, "predvars")= language list(y, x)
.. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
.. .. ..- attr(*, "names")= chr [1:2] "y" "x"
$ model :'data.frame': 10 obs. of 2 variables:
..$ y: num [1:10] 39.8 114.3 136.7 161.8 209.2 ...
..$ x: int [1:10] 1 2 3 4 5 6 7 8 9 10
..- attr(*, "terms")=Classes 'terms', 'formula' language y ~ x
.. .. ..- attr(*, "variables")= language list(y, x)
.. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
.. .. .. ..- attr(*, "dimnames")=List of 2
.. .. .. .. ..$ : chr [1:2] "y" "x"
.. .. .. .. ..$ : chr "x"
.. .. ..- attr(*, "term.labels")= chr "x"
.. .. ..- attr(*, "order")= int 1
.. .. ..- attr(*, "intercept")= int 1
.. .. ..- attr(*, "response")= int 1
.. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
.. .. ..- attr(*, "predvars")= language list(y, x)
.. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
.. .. .. ..- attr(*, "names")= chr [1:2] "y" "x"
- attr(*, "class")= chr "lm"
plot(x,y,col="lightgreen",lwd=5,pch=23,main="plot of y vs x",xlab="x",ylab="y",type="b")
lines(d$x,mylm$fitted.values)
=c(7.3,2.9,6.78)
x1=data.frame(x=x1)
dpred dpred
x
1 7.30
2 2.90
3 6.78
=predict(mylm,dpred,add=T)
ypred ypred
1 2 3
226.4561 122.3999 214.1585
$ypred=ypred
dpred dpred
x ypred
1 7.30 226.4561
2 2.90 122.3999
3 6.78 214.1585
3 multivariate regression using lm
3.1 creating data frame
=25
n=runif(n,-50,50)
x1=rnorm(n,50,20)
x2=runif(n,-100,100)
x3=3*x1+4*x2+5*x3+56+runif(n,-100,100)
y=data.frame(x1,x2,x3,y)
d d
x1 x2 x3 y
1 19.359129 64.104869 79.716098 811.73767
2 4.497484 37.059620 -22.100043 27.38452
3 -21.726642 67.363617 -37.825844 161.20631
4 42.343348 57.512712 -67.994267 -2.52689
5 -20.768416 56.205243 79.237170 558.63290
6 33.729563 50.100139 -66.721244 106.60058
7 -21.377672 49.247395 80.084919 678.45178
8 -23.317922 64.479521 -73.184361 -166.12624
9 -31.327721 40.065223 -73.677173 -321.41392
10 -26.777409 50.227903 -78.942499 -158.70102
11 -18.338755 50.197199 2.316716 262.21156
12 -19.730663 63.565428 -39.960189 134.46362
13 -34.095400 70.591261 -94.656621 -138.28461
14 -46.000408 15.409430 -38.070514 -122.24393
15 -28.120046 5.913038 48.423931 234.63875
16 31.059855 60.863458 -92.908655 -115.21797
17 2.569755 50.428544 13.015222 280.80869
18 41.465817 53.251316 -43.948445 174.31152
19 33.134505 74.835085 -59.160737 158.33341
20 -45.422974 65.655550 -73.252220 -220.21866
21 -4.390852 50.962414 -34.863615 164.80359
22 -23.481333 20.491997 -68.987606 -250.59417
23 -19.532780 58.715246 -74.007572 -212.28854
24 0.730687 48.590596 -12.893788 172.69491
25 -31.890379 52.223931 -92.271469 -209.26938
cor(d)
x1 x2 x3 y
x1 1.000000000 0.2862297 0.003421378 0.2964186
x2 0.286229730 1.0000000 -0.209364902 0.1121923
x3 0.003421378 -0.2093649 1.000000000 0.9020780
y 0.296418603 0.1121923 0.902078036 1.0000000
=lm(y~.,d)
mylm mylm
Call:
lm(formula = y ~ ., data = d)
Coefficients:
(Intercept) x1 x2 x3
49.356 2.379 4.325 5.054
summary(mylm)
Call:
lm(formula = y ~ ., data = d)
Residuals:
Min 1Q Median 3Q Max
-97.16 -58.54 15.95 59.09 97.53
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 49.3557 47.1168 1.048 0.306768
x1 2.3790 0.5265 4.518 0.000188 ***
x2 4.3250 0.8760 4.937 6.96e-05 ***
x3 5.0540 0.2554 19.790 4.63e-15 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 67.2 on 21 degrees of freedom
Multiple R-squared: 0.9536, Adjusted R-squared: 0.947
F-statistic: 143.9 on 3 and 21 DF, p-value: 3.674e-14
str(mylm)
List of 12
$ coefficients : Named num [1:4] 49.36 2.38 4.32 5.05
..- attr(*, "names")= chr [1:4] "(Intercept)" "x1" "x2" "x3"
$ residuals : Named num [1:25] 36.2 -81.3 63.4 -57.7 -84.9 ...
..- attr(*, "names")= chr [1:25] "1" "2" "3" "4" ...
$ effects : Named num [1:25] -401.9 -423.9 -40.8 1329.9 -114.5 ...
..- attr(*, "names")= chr [1:25] "(Intercept)" "x1" "x2" "x3" ...
$ rank : int 4
$ fitted.values: Named num [1:25] 775.5 108.6 97.8 55.2 643.5 ...
..- attr(*, "names")= chr [1:25] "1" "2" "3" "4" ...
$ assign : int [1:4] 0 1 2 3
$ qr :List of 5
..$ qr : num [1:25, 1:4] -5 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 ...
.. ..- attr(*, "dimnames")=List of 2
.. .. ..$ : chr [1:25] "1" "2" "3" "4" ...
.. .. ..$ : chr [1:4] "(Intercept)" "x1" "x2" "x3"
.. ..- attr(*, "assign")= int [1:4] 0 1 2 3
..$ qraux: num [1:4] 1.2 1.06 1.19 1.19
..$ pivot: int [1:4] 1 2 3 4
..$ tol : num 1e-07
..$ rank : int 4
..- attr(*, "class")= chr "qr"
$ df.residual : int 21
$ xlevels : Named list()
$ call : language lm(formula = y ~ ., data = d)
$ terms :Classes 'terms', 'formula' language y ~ x1 + x2 + x3
.. ..- attr(*, "variables")= language list(y, x1, x2, x3)
.. ..- attr(*, "factors")= int [1:4, 1:3] 0 1 0 0 0 0 1 0 0 0 ...
.. .. ..- attr(*, "dimnames")=List of 2
.. .. .. ..$ : chr [1:4] "y" "x1" "x2" "x3"
.. .. .. ..$ : chr [1:3] "x1" "x2" "x3"
.. ..- attr(*, "term.labels")= chr [1:3] "x1" "x2" "x3"
.. ..- attr(*, "order")= int [1:3] 1 1 1
.. ..- attr(*, "intercept")= int 1
.. ..- attr(*, "response")= int 1
.. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
.. ..- attr(*, "predvars")= language list(y, x1, x2, x3)
.. ..- attr(*, "dataClasses")= Named chr [1:4] "numeric" "numeric" "numeric" "numeric"
.. .. ..- attr(*, "names")= chr [1:4] "y" "x1" "x2" "x3"
$ model :'data.frame': 25 obs. of 4 variables:
..$ y : num [1:25] 811.74 27.38 161.21 -2.53 558.63 ...
..$ x1: num [1:25] 19.4 4.5 -21.7 42.3 -20.8 ...
..$ x2: num [1:25] 64.1 37.1 67.4 57.5 56.2 ...
..$ x3: num [1:25] 79.7 -22.1 -37.8 -68 79.2 ...
..- attr(*, "terms")=Classes 'terms', 'formula' language y ~ x1 + x2 + x3
.. .. ..- attr(*, "variables")= language list(y, x1, x2, x3)
.. .. ..- attr(*, "factors")= int [1:4, 1:3] 0 1 0 0 0 0 1 0 0 0 ...
.. .. .. ..- attr(*, "dimnames")=List of 2
.. .. .. .. ..$ : chr [1:4] "y" "x1" "x2" "x3"
.. .. .. .. ..$ : chr [1:3] "x1" "x2" "x3"
.. .. ..- attr(*, "term.labels")= chr [1:3] "x1" "x2" "x3"
.. .. ..- attr(*, "order")= int [1:3] 1 1 1
.. .. ..- attr(*, "intercept")= int 1
.. .. ..- attr(*, "response")= int 1
.. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
.. .. ..- attr(*, "predvars")= language list(y, x1, x2, x3)
.. .. ..- attr(*, "dataClasses")= Named chr [1:4] "numeric" "numeric" "numeric" "numeric"
.. .. .. ..- attr(*, "names")= chr [1:4] "y" "x1" "x2" "x3"
- attr(*, "class")= chr "lm"
3.2 deciding input variable values to predict y
=3
k=runif(k,min(x1),max(x1))
x1=runif(k,min(x2),max(x2))
x2=runif(k,min(x3),max(x3))
x3=data.frame(x1,x2,x3)
dpred dpred
x1 x2 x3
1 -4.673876 49.45405 77.22529
2 34.230666 65.81721 -37.97291
3 6.806479 40.56358 -10.54042
=predict(mylm,dpred)
ypred ypred
1 2 3
642.4215 223.5317 187.7134
$ypred=ypred
dpred dpred
x1 x2 x3 ypred
1 -4.673876 49.45405 77.22529 642.4215
2 34.230666 65.81721 -37.97291 223.5317
3 6.806479 40.56358 -10.54042 187.7134
4 closing r environment
r_end()
The following packages have been unloaded:
lubridate, forcats, stringr, dplyr, purrr, readr, tidyr, tibble, ggplot2, tidyverse, pacman