ls()
## character(0)
getwd()
## [1] "C:/Users/dell/Desktop/regression"
dir()
## [1] "reg1.R"           "reg1.spin.R"      "reg1.spin.Rmd"   
## [4] "regression.Rproj"
data("iris")
names(iris)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width" 
## [5] "Species"
lm(Sepal.Length~Sepal.Width,data = iris)
## 
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width, data = iris)
## 
## Coefficients:
## (Intercept)  Sepal.Width  
##      6.5262      -0.2234
a=lm(Sepal.Length~Sepal.Width,data = iris)
names(a)
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"
class(a)
## [1] "lm"
a$coefficients
## (Intercept) Sepal.Width 
##   6.5262226  -0.2233611
summary(a)
## 
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width, data = iris)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5561 -0.6333 -0.1120  0.5579  2.2226 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.5262     0.4789   13.63   <2e-16 ***
## Sepal.Width  -0.2234     0.1551   -1.44    0.152    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8251 on 148 degrees of freedom
## Multiple R-squared:  0.01382,    Adjusted R-squared:  0.007159 
## F-statistic: 2.074 on 1 and 148 DF,  p-value: 0.1519
b=lm(Sepal.Length~Sepal.Width + Petal.Length + Petal.Width,data = iris)
names(b)
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"
class(b)
## [1] "lm"
b$coefficients
##  (Intercept)  Sepal.Width Petal.Length  Petal.Width 
##    1.8559975    0.6508372    0.7091320   -0.5564827
summary(b)
## 
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width + Petal.Length + Petal.Width, 
##     data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.82816 -0.21989  0.01875  0.19709  0.84570 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.85600    0.25078   7.401 9.85e-12 ***
## Sepal.Width   0.65084    0.06665   9.765  < 2e-16 ***
## Petal.Length  0.70913    0.05672  12.502  < 2e-16 ***
## Petal.Width  -0.55648    0.12755  -4.363 2.41e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3145 on 146 degrees of freedom
## Multiple R-squared:  0.8586, Adjusted R-squared:  0.8557 
## F-statistic: 295.5 on 3 and 146 DF,  p-value: < 2.2e-16
c=lm(Sepal.Length~Sepal.Width + Petal.Length + Petal.Width+Species,data = iris)
names(c)
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "contrasts"     "xlevels"       "call"          "terms"        
## [13] "model"
class(c)
## [1] "lm"
c$coefficients
##       (Intercept)       Sepal.Width      Petal.Length       Petal.Width 
##         2.1712663         0.4958889         0.8292439        -0.3151552 
## Speciesversicolor  Speciesvirginica 
##        -0.7235620        -1.0234978
summary(c)
## 
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width + Petal.Length + Petal.Width + 
##     Species, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79424 -0.21874  0.00899  0.20255  0.73103 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.17127    0.27979   7.760 1.43e-12 ***
## Sepal.Width        0.49589    0.08607   5.761 4.87e-08 ***
## Petal.Length       0.82924    0.06853  12.101  < 2e-16 ***
## Petal.Width       -0.31516    0.15120  -2.084  0.03889 *  
## Speciesversicolor -0.72356    0.24017  -3.013  0.00306 ** 
## Speciesvirginica  -1.02350    0.33373  -3.067  0.00258 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3068 on 144 degrees of freedom
## Multiple R-squared:  0.8673, Adjusted R-squared:  0.8627 
## F-statistic: 188.3 on 5 and 144 DF,  p-value: < 2.2e-16
#mtcars

data("mtcars")

names(mtcars)
##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
d=lm(mpg~cyl+disp+hp+drat+wt+qsec+vs+am+gear+carb,data = mtcars)
summary(d)
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs + 
##     am + gear + carb, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4506 -1.6044 -0.1196  1.2193  4.6271 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 12.30337   18.71788   0.657   0.5181  
## cyl         -0.11144    1.04502  -0.107   0.9161  
## disp         0.01334    0.01786   0.747   0.4635  
## hp          -0.02148    0.02177  -0.987   0.3350  
## drat         0.78711    1.63537   0.481   0.6353  
## wt          -3.71530    1.89441  -1.961   0.0633 .
## qsec         0.82104    0.73084   1.123   0.2739  
## vs           0.31776    2.10451   0.151   0.8814  
## am           2.52023    2.05665   1.225   0.2340  
## gear         0.65541    1.49326   0.439   0.6652  
## carb        -0.19942    0.82875  -0.241   0.8122  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared:  0.869,  Adjusted R-squared:  0.8066 
## F-statistic: 13.93 on 10 and 21 DF,  p-value: 3.793e-07
#diamonds

library(ggplot2)
data(diamonds)
str(diamonds)
## Classes 'tbl_df', 'tbl' and 'data.frame':    53940 obs. of  10 variables:
##  $ carat  : num  0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
##  $ cut    : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
##  $ color  : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
##  $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
##  $ depth  : num  61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
##  $ table  : num  55 61 65 58 58 57 57 55 61 61 ...
##  $ price  : int  326 326 327 334 335 336 336 337 337 338 ...
##  $ x      : num  3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
##  $ y      : num  3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
##  $ z      : num  2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
diamonds$unitprice=with(diamonds,price/carat)
head(diamonds)
## # A tibble: 6 × 11
##   carat       cut color clarity depth table price     x     y     z
##   <dbl>     <ord> <ord>   <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23     Ideal     E     SI2  61.5    55   326  3.95  3.98  2.43
## 2  0.21   Premium     E     SI1  59.8    61   326  3.89  3.84  2.31
## 3  0.23      Good     E     VS1  56.9    65   327  4.05  4.07  2.31
## 4  0.29   Premium     I     VS2  62.4    58   334  4.20  4.23  2.63
## 5  0.31      Good     J     SI2  63.3    58   335  4.34  4.35  2.75
## 6  0.24 Very Good     J    VVS2  62.8    57   336  3.94  3.96  2.48
## # ... with 1 more variables: unitprice <dbl>
h=lm(unitprice~table+color+clarity+cut+x+y+z+depth,data=diamonds)
summary(h)
## 
## Call:
## lm(formula = unitprice ~ table + color + clarity + cut + x + 
##     y + z + depth, data = diamonds)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5166.4  -463.9   -92.6   355.8 17851.4 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -11688.890    260.693  -44.838  < 2e-16 ***
## table            1.875      2.037    0.920  0.35732    
## color.L      -1885.098     12.044 -156.515  < 2e-16 ***
## color.Q       -456.386     11.015  -41.434  < 2e-16 ***
## color.C        -78.133     10.305   -7.582 3.45e-14 ***
## color^4         78.474      9.464    8.292  < 2e-16 ***
## color^5        -56.637      8.941   -6.335 2.40e-10 ***
## color^6        -10.800      8.128   -1.329  0.18396    
## clarity.L     3794.856     21.175  179.211  < 2e-16 ***
## clarity.Q    -1074.105     19.694  -54.540  < 2e-16 ***
## clarity.C      507.278     16.878   30.055  < 2e-16 ***
## clarity^4     -169.564     13.496  -12.564  < 2e-16 ***
## clarity^5      117.177     11.023   10.630  < 2e-16 ***
## clarity^6       47.190      9.598    4.917 8.83e-07 ***
## clarity^7      124.830      8.466   14.745  < 2e-16 ***
## cut.L          487.537     15.727   31.000  < 2e-16 ***
## cut.Q         -217.290     12.590  -17.259  < 2e-16 ***
## cut.C          127.423     10.836   11.759  < 2e-16 ***
## cut^4           16.718      8.661    1.930  0.05359 .  
## x             1761.626     18.702   94.195  < 2e-16 ***
## y               67.623     13.523    5.001 5.74e-07 ***
## z               67.878     23.435    2.896  0.00378 ** 
## depth           73.620      3.110   23.671  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 790.9 on 53917 degrees of freedom
## Multiple R-squared:  0.8456, Adjusted R-squared:  0.8456 
## F-statistic: 1.343e+04 on 22 and 53917 DF,  p-value: < 2.2e-16
a=NULL
a$wt=c(55,45,30,60,50,100,75,75,65,NA,NA,NA)
a$age=c(18,11,7,14,13,41,38,36,43,NA,68,62)
c01=lm(wt~age,a)
summary(c01)
## 
## Call:
## lm(formula = wt ~ age, data = a)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18.3557  -2.4762  -0.1243   1.9217  18.9961 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  32.7914     7.9310   4.135  0.00438 **
## age           1.1759     0.2818   4.173  0.00417 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.63 on 7 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.7133, Adjusted R-squared:  0.6723 
## F-statistic: 17.41 on 1 and 7 DF,  p-value: 0.004174
a=as.data.frame(a)
a
##     wt age
## 1   55  18
## 2   45  11
## 3   30   7
## 4   60  14
## 5   50  13
## 6  100  41
## 7   75  38
## 8   75  36
## 9   65  43
## 10  NA  NA
## 11  NA  68
## 12  NA  62
b=na.omit(a)
c1=lm(wt~age,b)
summary(c1)
## 
## Call:
## lm(formula = wt ~ age, data = b)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18.3557  -2.4762  -0.1243   1.9217  18.9961 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  32.7914     7.9310   4.135  0.00438 **
## age           1.1759     0.2818   4.173  0.00417 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.63 on 7 degrees of freedom
## Multiple R-squared:  0.7133, Adjusted R-squared:  0.6723 
## F-statistic: 17.41 on 1 and 7 DF,  p-value: 0.004174
a2=a
wtmed=median(a2$wt,na.rm=T)
wtmed
## [1] 60
agemed=median(a2$age,na.rm=T)
agemed
## [1] 36
a2
##     wt age
## 1   55  18
## 2   45  11
## 3   30   7
## 4   60  14
## 5   50  13
## 6  100  41
## 7   75  38
## 8   75  36
## 9   65  43
## 10  NA  NA
## 11  NA  68
## 12  NA  62
a2$wt=ifelse(is.na(a2$wt),
             wtmed,
             a2$wt)
a2
##     wt age
## 1   55  18
## 2   45  11
## 3   30   7
## 4   60  14
## 5   50  13
## 6  100  41
## 7   75  38
## 8   75  36
## 9   65  43
## 10  60  NA
## 11  60  68
## 12  60  62
a2$age=ifelse(is.na(a2$age),
             agemed,
             a2$age)
a2
##     wt age
## 1   55  18
## 2   45  11
## 3   30   7
## 4   60  14
## 5   50  13
## 6  100  41
## 7   75  38
## 8   75  36
## 9   65  43
## 10  60  36
## 11  60  68
## 12  60  62
c12=lm(wt~age,a2)
summary(c12)
## 
## Call:
## lm(formula = wt ~ age, data = a2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.456  -8.866  -1.849   7.737  35.009 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  47.4630     8.9308   5.315  0.00034 ***
## age           0.4275     0.2380   1.796  0.10272    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.81 on 10 degrees of freedom
## Multiple R-squared:  0.2439, Adjusted R-squared:  0.1683 
## F-statistic: 3.226 on 1 and 10 DF,  p-value: 0.1027