ls()
## character(0)
getwd()
## [1] "C:/Users/dell/Desktop/regression"
dir()
## [1] "reg1.R" "reg1.spin.R" "reg1.spin.Rmd"
## [4] "regression.Rproj"
data("iris")
names(iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
## [5] "Species"
lm(Sepal.Length~Sepal.Width,data = iris)
##
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width, data = iris)
##
## Coefficients:
## (Intercept) Sepal.Width
## 6.5262 -0.2234
a=lm(Sepal.Length~Sepal.Width,data = iris)
names(a)
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
class(a)
## [1] "lm"
a$coefficients
## (Intercept) Sepal.Width
## 6.5262226 -0.2233611
summary(a)
##
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5561 -0.6333 -0.1120 0.5579 2.2226
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.5262 0.4789 13.63 <2e-16 ***
## Sepal.Width -0.2234 0.1551 -1.44 0.152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8251 on 148 degrees of freedom
## Multiple R-squared: 0.01382, Adjusted R-squared: 0.007159
## F-statistic: 2.074 on 1 and 148 DF, p-value: 0.1519
b=lm(Sepal.Length~Sepal.Width + Petal.Length + Petal.Width,data = iris)
names(b)
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
class(b)
## [1] "lm"
b$coefficients
## (Intercept) Sepal.Width Petal.Length Petal.Width
## 1.8559975 0.6508372 0.7091320 -0.5564827
summary(b)
##
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width + Petal.Length + Petal.Width,
## data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.82816 -0.21989 0.01875 0.19709 0.84570
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.85600 0.25078 7.401 9.85e-12 ***
## Sepal.Width 0.65084 0.06665 9.765 < 2e-16 ***
## Petal.Length 0.70913 0.05672 12.502 < 2e-16 ***
## Petal.Width -0.55648 0.12755 -4.363 2.41e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3145 on 146 degrees of freedom
## Multiple R-squared: 0.8586, Adjusted R-squared: 0.8557
## F-statistic: 295.5 on 3 and 146 DF, p-value: < 2.2e-16
c=lm(Sepal.Length~Sepal.Width + Petal.Length + Petal.Width+Species,data = iris)
names(c)
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "contrasts" "xlevels" "call" "terms"
## [13] "model"
class(c)
## [1] "lm"
c$coefficients
## (Intercept) Sepal.Width Petal.Length Petal.Width
## 2.1712663 0.4958889 0.8292439 -0.3151552
## Speciesversicolor Speciesvirginica
## -0.7235620 -1.0234978
summary(c)
##
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width + Petal.Length + Petal.Width +
## Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.79424 -0.21874 0.00899 0.20255 0.73103
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.17127 0.27979 7.760 1.43e-12 ***
## Sepal.Width 0.49589 0.08607 5.761 4.87e-08 ***
## Petal.Length 0.82924 0.06853 12.101 < 2e-16 ***
## Petal.Width -0.31516 0.15120 -2.084 0.03889 *
## Speciesversicolor -0.72356 0.24017 -3.013 0.00306 **
## Speciesvirginica -1.02350 0.33373 -3.067 0.00258 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3068 on 144 degrees of freedom
## Multiple R-squared: 0.8673, Adjusted R-squared: 0.8627
## F-statistic: 188.3 on 5 and 144 DF, p-value: < 2.2e-16
#mtcars
data("mtcars")
names(mtcars)
## [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
## [11] "carb"
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
d=lm(mpg~cyl+disp+hp+drat+wt+qsec+vs+am+gear+carb,data = mtcars)
summary(d)
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs +
## am + gear + carb, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4506 -1.6044 -0.1196 1.2193 4.6271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.30337 18.71788 0.657 0.5181
## cyl -0.11144 1.04502 -0.107 0.9161
## disp 0.01334 0.01786 0.747 0.4635
## hp -0.02148 0.02177 -0.987 0.3350
## drat 0.78711 1.63537 0.481 0.6353
## wt -3.71530 1.89441 -1.961 0.0633 .
## qsec 0.82104 0.73084 1.123 0.2739
## vs 0.31776 2.10451 0.151 0.8814
## am 2.52023 2.05665 1.225 0.2340
## gear 0.65541 1.49326 0.439 0.6652
## carb -0.19942 0.82875 -0.241 0.8122
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared: 0.869, Adjusted R-squared: 0.8066
## F-statistic: 13.93 on 10 and 21 DF, p-value: 3.793e-07
#diamonds
library(ggplot2)
data(diamonds)
str(diamonds)
## Classes 'tbl_df', 'tbl' and 'data.frame': 53940 obs. of 10 variables:
## $ carat : num 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
diamonds$unitprice=with(diamonds,price/carat)
head(diamonds)
## # A tibble: 6 × 11
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## # ... with 1 more variables: unitprice <dbl>
h=lm(unitprice~table+color+clarity+cut+x+y+z+depth,data=diamonds)
summary(h)
##
## Call:
## lm(formula = unitprice ~ table + color + clarity + cut + x +
## y + z + depth, data = diamonds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5166.4 -463.9 -92.6 355.8 17851.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11688.890 260.693 -44.838 < 2e-16 ***
## table 1.875 2.037 0.920 0.35732
## color.L -1885.098 12.044 -156.515 < 2e-16 ***
## color.Q -456.386 11.015 -41.434 < 2e-16 ***
## color.C -78.133 10.305 -7.582 3.45e-14 ***
## color^4 78.474 9.464 8.292 < 2e-16 ***
## color^5 -56.637 8.941 -6.335 2.40e-10 ***
## color^6 -10.800 8.128 -1.329 0.18396
## clarity.L 3794.856 21.175 179.211 < 2e-16 ***
## clarity.Q -1074.105 19.694 -54.540 < 2e-16 ***
## clarity.C 507.278 16.878 30.055 < 2e-16 ***
## clarity^4 -169.564 13.496 -12.564 < 2e-16 ***
## clarity^5 117.177 11.023 10.630 < 2e-16 ***
## clarity^6 47.190 9.598 4.917 8.83e-07 ***
## clarity^7 124.830 8.466 14.745 < 2e-16 ***
## cut.L 487.537 15.727 31.000 < 2e-16 ***
## cut.Q -217.290 12.590 -17.259 < 2e-16 ***
## cut.C 127.423 10.836 11.759 < 2e-16 ***
## cut^4 16.718 8.661 1.930 0.05359 .
## x 1761.626 18.702 94.195 < 2e-16 ***
## y 67.623 13.523 5.001 5.74e-07 ***
## z 67.878 23.435 2.896 0.00378 **
## depth 73.620 3.110 23.671 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 790.9 on 53917 degrees of freedom
## Multiple R-squared: 0.8456, Adjusted R-squared: 0.8456
## F-statistic: 1.343e+04 on 22 and 53917 DF, p-value: < 2.2e-16
a=NULL
a$wt=c(55,45,30,60,50,100,75,75,65,NA,NA,NA)
a$age=c(18,11,7,14,13,41,38,36,43,NA,68,62)
c01=lm(wt~age,a)
summary(c01)
##
## Call:
## lm(formula = wt ~ age, data = a)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.3557 -2.4762 -0.1243 1.9217 18.9961
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.7914 7.9310 4.135 0.00438 **
## age 1.1759 0.2818 4.173 0.00417 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.63 on 7 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.7133, Adjusted R-squared: 0.6723
## F-statistic: 17.41 on 1 and 7 DF, p-value: 0.004174
a=as.data.frame(a)
a
## wt age
## 1 55 18
## 2 45 11
## 3 30 7
## 4 60 14
## 5 50 13
## 6 100 41
## 7 75 38
## 8 75 36
## 9 65 43
## 10 NA NA
## 11 NA 68
## 12 NA 62
b=na.omit(a)
c1=lm(wt~age,b)
summary(c1)
##
## Call:
## lm(formula = wt ~ age, data = b)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.3557 -2.4762 -0.1243 1.9217 18.9961
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.7914 7.9310 4.135 0.00438 **
## age 1.1759 0.2818 4.173 0.00417 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.63 on 7 degrees of freedom
## Multiple R-squared: 0.7133, Adjusted R-squared: 0.6723
## F-statistic: 17.41 on 1 and 7 DF, p-value: 0.004174
a2=a
wtmed=median(a2$wt,na.rm=T)
wtmed
## [1] 60
agemed=median(a2$age,na.rm=T)
agemed
## [1] 36
a2
## wt age
## 1 55 18
## 2 45 11
## 3 30 7
## 4 60 14
## 5 50 13
## 6 100 41
## 7 75 38
## 8 75 36
## 9 65 43
## 10 NA NA
## 11 NA 68
## 12 NA 62
a2$wt=ifelse(is.na(a2$wt),
wtmed,
a2$wt)
a2
## wt age
## 1 55 18
## 2 45 11
## 3 30 7
## 4 60 14
## 5 50 13
## 6 100 41
## 7 75 38
## 8 75 36
## 9 65 43
## 10 60 NA
## 11 60 68
## 12 60 62
a2$age=ifelse(is.na(a2$age),
agemed,
a2$age)
a2
## wt age
## 1 55 18
## 2 45 11
## 3 30 7
## 4 60 14
## 5 50 13
## 6 100 41
## 7 75 38
## 8 75 36
## 9 65 43
## 10 60 36
## 11 60 68
## 12 60 62
c12=lm(wt~age,a2)
summary(c12)
##
## Call:
## lm(formula = wt ~ age, data = a2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.456 -8.866 -1.849 7.737 35.009
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 47.4630 8.9308 5.315 0.00034 ***
## age 0.4275 0.2380 1.796 0.10272
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.81 on 10 degrees of freedom
## Multiple R-squared: 0.2439, Adjusted R-squared: 0.1683
## F-statistic: 3.226 on 1 and 10 DF, p-value: 0.1027