Assignment 1

Find a data set of which you can fit multiple linear regression and interpret your results

#install.packages("readxl")
library(readxl)
data <- read_excel("D:/Emerance - personal stuffs/Big data/R Programming/Last Assignment/pathogen_dataset.xlsx")

str(data)
## tibble [3,040 Ă— 31] (S3: tbl_df/tbl/data.frame)
##  $ Var1         : num [1:3040] 63.5 46.1 60.3 49.3 52.1 ...
##  $ Var2         : num [1:3040] 37 34.2 64.9 54.4 47.3 ...
##  $ Var3         : num [1:3040] 37.5 58.1 46.7 30.4 60.2 ...
##  $ Var4         : num [1:3040] 57.2 36.5 62.3 52.9 51.4 ...
##  $ Var5         : num [1:3040] 59.7 59.5 41.5 75 35.1 ...
##  $ Var6         : num [1:3040] 64.2 54.9 53.3 64.9 53.9 ...
##  $ Var7         : num [1:3040] 46.5 54.2 61.3 37.9 52.3 ...
##  $ Var8         : num [1:3040] 65.2 51.3 72.6 51.3 71.7 ...
##  $ Var9         : num [1:3040] 54.9 37.9 32.2 55 41.9 ...
##  $ Var10        : num [1:3040] 38.9 59.7 44.4 60.2 55.8 ...
##  $ Var11        : num [1:3040] 54.5 24.6 49.2 47.5 47.9 ...
##  $ Var12        : num [1:3040] 30.1 43.9 44.4 51.3 49.8 ...
##  $ Var13        : num [1:3040] 28.7 50.3 49.3 68.6 47.4 ...
##  $ Var14        : num [1:3040] 33.3 47.2 31.5 52.7 62.9 ...
##  $ Var15        : num [1:3040] 48.8 42.6 48.8 62.8 41.7 ...
##  $ Var16        : num [1:3040] 49.4 41.1 56.4 47.1 49.6 ...
##  $ Var17        : num [1:3040] 58.3 51 51.5 49.5 62.3 ...
##  $ Var18        : num [1:3040] 40.7 39.7 58.4 57.3 29.1 ...
##  $ Var19        : num [1:3040] 53.2 57.8 47.9 56 53.1 ...
##  $ Var20        : num [1:3040] 50.5 51.9 41.1 53 46.7 ...
##  $ ID           : num [1:3040] 1782 1860 2878 2297 1492 ...
##  $ Year         : num [1:3040] 2013 2011 2023 2020 2019 ...
##  $ Sex          : chr [1:3040] "Female" "Male" "Female" "Male" ...
##  $ Education    : chr [1:3040] "Tertiary" "None" "Primary" "Secondary" ...
##  $ WaterSource  : chr [1:3040] "Borehole" "Borehole" "Tap" "Tap" ...
##  $ BoiledWater  : chr [1:3040] "No" "No" "No" "No" ...
##  $ HandWashing  : chr [1:3040] "No" "Yes" "Yes" "No" ...
##  $ ToiletType   : chr [1:3040] "Pit Latrine" "Pit Latrine" "Flush" "Pit Latrine" ...
##  $ Age          : num [1:3040] 23 59 21 26 28 64 39 76 19 19 ...
##  $ Site         : chr [1:3040] "Hospital B" "Hospital B" "Hospital A" "Clinic C" ...
##  $ bact_isolated: chr [1:3040] "None" "E.coli" "None" "E.coli" ...
head(data)
## # A tibble: 6 Ă— 31
##    Var1  Var2  Var3  Var4  Var5  Var6  Var7  Var8  Var9 Var10 Var11 Var12 Var13
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  63.5  37.0  37.5  57.2  59.7  64.2  46.5  65.2  54.9  38.9  54.5  30.1  28.7
## 2  46.1  34.2  58.1  36.5  59.5  54.9  54.2  51.3  37.9  59.7  24.6  43.9  50.3
## 3  60.3  64.9  46.7  62.3  41.5  53.3  61.3  72.6  32.2  44.4  49.2  44.4  49.3
## 4  49.3  54.4  30.4  52.9  75.0  64.9  37.9  51.3  55.0  60.2  47.5  51.3  68.6
## 5  52.1  47.3  60.2  51.4  35.1  53.9  52.3  71.7  41.9  55.8  47.9  49.8  47.4
## 6  44.9  36.5  55.0  55.2  66.7  57.9  60.0  57.2  30.8  37.9  39.3  44.6  56.0
## # ℹ 18 more variables: Var14 <dbl>, Var15 <dbl>, Var16 <dbl>, Var17 <dbl>,
## #   Var18 <dbl>, Var19 <dbl>, Var20 <dbl>, ID <dbl>, Year <dbl>, Sex <chr>,
## #   Education <chr>, WaterSource <chr>, BoiledWater <chr>, HandWashing <chr>,
## #   ToiletType <chr>, Age <dbl>, Site <chr>, bact_isolated <chr>
#Check for missing values
colSums(is.na(data))
##          Var1          Var2          Var3          Var4          Var5 
##             0             0             0             0           153 
##          Var6          Var7          Var8          Var9         Var10 
##             0             0             0             0             0 
##         Var11         Var12         Var13         Var14         Var15 
##             0             0             0             0             0 
##         Var16         Var17         Var18         Var19         Var20 
##             0             0             0             0             0 
##            ID          Year           Sex     Education   WaterSource 
##             0             0            50            30             0 
##   BoiledWater   HandWashing    ToiletType           Age          Site 
##             0             0             0             0             0 
## bact_isolated 
##             0
#Using Var1 as the dependent variable:
model <- lm(Var1 ~ Var2 + Var3 + Var4 + Var5 + Age + Year,
            data = data)
summary(model)
## 
## Call:
## lm(formula = Var1 ~ Var2 + Var3 + Var4 + Var5 + Age + Year, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.642  -7.496  -1.017   5.782 115.823 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 149.52032  118.71592   1.259   0.2080  
## Var2          0.01899    0.02630   0.722   0.4704  
## Var3          0.02203    0.02689   0.819   0.4128  
## Var4         -0.04848    0.02666  -1.819   0.0691 .
## Var5          0.04581    0.02662   1.721   0.0853 .
## Age           0.01504    0.01158   1.298   0.1943  
## Year         -0.05010    0.05882  -0.852   0.3944  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.36 on 2880 degrees of freedom
##   (153 observations deleted due to missingness)
## Multiple R-squared:  0.00353,    Adjusted R-squared:  0.001454 
## F-statistic:   1.7 on 6 and 2880 DF,  p-value: 0.1169
#Include categorical variables
model2 <- lm(Var1 ~ Var2 + Var3 + Age + Year +
             Sex + Education + WaterSource +
             BoiledWater + HandWashing +
             ToiletType + Site,
             data = data)

summary(model2)
## 
## Call:
## lm(formula = Var1 ~ Var2 + Var3 + Age + Year + Sex + Education + 
##     WaterSource + BoiledWater + HandWashing + ToiletType + Site, 
##     data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.373  -7.454  -0.755   6.002 116.185 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)   
## (Intercept)           148.623267 112.159642   1.325  0.18524   
## Var2                    0.031090   0.025089   1.239  0.21538   
## Var3                    0.019473   0.025527   0.763  0.44563   
## Age                     0.009809   0.011031   0.889  0.37397   
## Year                   -0.049384   0.055584  -0.888  0.37437   
## SexMale                 0.271239   0.507234   0.535  0.59287   
## EducationPrimary        0.796523   0.709823   1.122  0.26189   
## EducationSecondary      0.957072   0.724872   1.320  0.18683   
## EducationTertiary       0.884035   0.715501   1.236  0.21673   
## WaterSourceRiver       -2.274560   0.725014  -3.137  0.00172 **
## WaterSourceTap         -1.970834   0.710251  -2.775  0.00556 **
## WaterSourceWell        -2.173780   0.722993  -3.007  0.00266 **
## BoiledWaterYes         -0.483528   0.507197  -0.953  0.34050   
## HandWashingYes         -0.132203   0.507326  -0.261  0.79443   
## ToiletTypeNone         -0.036528   0.622172  -0.059  0.95319   
## ToiletTypePit Latrine  -0.294781   0.618160  -0.477  0.63349   
## SiteHospital A          0.832230   0.625417   1.331  0.18340   
## SiteHospital B         -0.419027   0.620120  -0.676  0.49927   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.77 on 2942 degrees of freedom
##   (80 observations deleted due to missingness)
## Multiple R-squared:  0.008628,   Adjusted R-squared:  0.0029 
## F-statistic: 1.506 on 17 and 2942 DF,  p-value: 0.08286
#Check regression assumptions:Residual plots
par(mfrow = c(2,2))
plot(model)

#Check multicollinearity
#install.packages("car")
library(car)
## Loading required package: carData
vif(model)
##     Var2     Var3     Var4     Var5      Age     Year 
## 1.002086 1.002513 1.001141 1.001107 1.002167 1.000534

It’s acceptable since the VIF <5

#Obtain predicted values
predicted_values <- predict(model)

head(predicted_values)
##        1        2        3        4        5        6 
## 50.50058 52.53551 49.62061 51.27420 50.12604 52.06168
#Obtain residuals
residuals_model <- residuals(model)

head(residuals_model)
##         1         2         3         4         5         6 
## 13.027259 -6.402198 10.655525 -1.991431  2.014692 -7.188180
summary(model)
## 
## Call:
## lm(formula = Var1 ~ Var2 + Var3 + Var4 + Var5 + Age + Year, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.642  -7.496  -1.017   5.782 115.823 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 149.52032  118.71592   1.259   0.2080  
## Var2          0.01899    0.02630   0.722   0.4704  
## Var3          0.02203    0.02689   0.819   0.4128  
## Var4         -0.04848    0.02666  -1.819   0.0691 .
## Var5          0.04581    0.02662   1.721   0.0853 .
## Age           0.01504    0.01158   1.298   0.1943  
## Year         -0.05010    0.05882  -0.852   0.3944  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.36 on 2880 degrees of freedom
##   (153 observations deleted due to missingness)
## Multiple R-squared:  0.00353,    Adjusted R-squared:  0.001454 
## F-statistic:   1.7 on 6 and 2880 DF,  p-value: 0.1169

The interpretation

The multiple linear regression model shows a very weak relationship between Var1 and the independent variables (Var2, Var3, Var4, Var5, Age, and Year). The model explains only 0.35% of the variation in Var1, and the overall model is not statistically significant (p = 0.1169). None of the predictors are statistically significant at the 5% level, although Var4 and Var5 show weak (marginal) effects. Therefore, these variables are not strong predictors of Var1 in this dataset.

Assignment 2 : Read about variable selection method

Variable selection is the process of choosing the most important independent variables (predictors) in a regression model.

Main Variable Selection Methods:

data(mtcars)
# Full model (all variables)
full_model <- lm(mpg ~ ., data = mtcars)
summary(full_model)
## 
## Call:
## lm(formula = mpg ~ ., data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4506 -1.6044 -0.1196  1.2193  4.6271 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 12.30337   18.71788   0.657   0.5181  
## cyl         -0.11144    1.04502  -0.107   0.9161  
## disp         0.01334    0.01786   0.747   0.4635  
## hp          -0.02148    0.02177  -0.987   0.3350  
## drat         0.78711    1.63537   0.481   0.6353  
## wt          -3.71530    1.89441  -1.961   0.0633 .
## qsec         0.82104    0.73084   1.123   0.2739  
## vs           0.31776    2.10451   0.151   0.8814  
## am           2.52023    2.05665   1.225   0.2340  
## gear         0.65541    1.49326   0.439   0.6652  
## carb        -0.19942    0.82875  -0.241   0.8122  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared:  0.869,  Adjusted R-squared:  0.8066 
## F-statistic: 13.93 on 10 and 21 DF,  p-value: 3.793e-07
#Backward Elimination
step_backward <- step(full_model, direction = "backward")
## Start:  AIC=70.9
## mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb
## 
##        Df Sum of Sq    RSS    AIC
## - cyl   1    0.0799 147.57 68.915
## - vs    1    0.1601 147.66 68.932
## - carb  1    0.4067 147.90 68.986
## - gear  1    1.3531 148.85 69.190
## - drat  1    1.6270 149.12 69.249
## - disp  1    3.9167 151.41 69.736
## - hp    1    6.8399 154.33 70.348
## - qsec  1    8.8641 156.36 70.765
## <none>              147.49 70.898
## - am    1   10.5467 158.04 71.108
## - wt    1   27.0144 174.51 74.280
## 
## Step:  AIC=68.92
## mpg ~ disp + hp + drat + wt + qsec + vs + am + gear + carb
## 
##        Df Sum of Sq    RSS    AIC
## - vs    1    0.2685 147.84 66.973
## - carb  1    0.5201 148.09 67.028
## - gear  1    1.8211 149.40 67.308
## - drat  1    1.9826 149.56 67.342
## - disp  1    3.9009 151.47 67.750
## - hp    1    7.3632 154.94 68.473
## <none>              147.57 68.915
## - qsec  1   10.0933 157.67 69.032
## - am    1   11.8359 159.41 69.384
## - wt    1   27.0280 174.60 72.297
## 
## Step:  AIC=66.97
## mpg ~ disp + hp + drat + wt + qsec + am + gear + carb
## 
##        Df Sum of Sq    RSS    AIC
## - carb  1    0.6855 148.53 65.121
## - gear  1    2.1437 149.99 65.434
## - drat  1    2.2139 150.06 65.449
## - disp  1    3.6467 151.49 65.753
## - hp    1    7.1060 154.95 66.475
## <none>              147.84 66.973
## - am    1   11.5694 159.41 67.384
## - qsec  1   15.6830 163.53 68.200
## - wt    1   27.3799 175.22 70.410
## 
## Step:  AIC=65.12
## mpg ~ disp + hp + drat + wt + qsec + am + gear
## 
##        Df Sum of Sq    RSS    AIC
## - gear  1     1.565 150.09 63.457
## - drat  1     1.932 150.46 63.535
## <none>              148.53 65.121
## - disp  1    10.110 158.64 65.229
## - am    1    12.323 160.85 65.672
## - hp    1    14.826 163.35 66.166
## - qsec  1    26.408 174.94 68.358
## - wt    1    69.127 217.66 75.350
## 
## Step:  AIC=63.46
## mpg ~ disp + hp + drat + wt + qsec + am
## 
##        Df Sum of Sq    RSS    AIC
## - drat  1     3.345 153.44 62.162
## - disp  1     8.545 158.64 63.229
## <none>              150.09 63.457
## - hp    1    13.285 163.38 64.171
## - am    1    20.036 170.13 65.466
## - qsec  1    25.574 175.67 66.491
## - wt    1    67.572 217.66 73.351
## 
## Step:  AIC=62.16
## mpg ~ disp + hp + wt + qsec + am
## 
##        Df Sum of Sq    RSS    AIC
## - disp  1     6.629 160.07 61.515
## <none>              153.44 62.162
## - hp    1    12.572 166.01 62.682
## - qsec  1    26.470 179.91 65.255
## - am    1    32.198 185.63 66.258
## - wt    1    69.043 222.48 72.051
## 
## Step:  AIC=61.52
## mpg ~ hp + wt + qsec + am
## 
##        Df Sum of Sq    RSS    AIC
## - hp    1     9.219 169.29 61.307
## <none>              160.07 61.515
## - qsec  1    20.225 180.29 63.323
## - am    1    25.993 186.06 64.331
## - wt    1    78.494 238.56 72.284
## 
## Step:  AIC=61.31
## mpg ~ wt + qsec + am
## 
##        Df Sum of Sq    RSS    AIC
## <none>              169.29 61.307
## - am    1    26.178 195.46 63.908
## - qsec  1   109.034 278.32 75.217
## - wt    1   183.347 352.63 82.790
summary(step_backward)
## 
## Call:
## lm(formula = mpg ~ wt + qsec + am, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4811 -1.5555 -0.7257  1.4110  4.6610 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   9.6178     6.9596   1.382 0.177915    
## wt           -3.9165     0.7112  -5.507 6.95e-06 ***
## qsec          1.2259     0.2887   4.247 0.000216 ***
## am            2.9358     1.4109   2.081 0.046716 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.459 on 28 degrees of freedom
## Multiple R-squared:  0.8497, Adjusted R-squared:  0.8336 
## F-statistic: 52.75 on 3 and 28 DF,  p-value: 1.21e-11
#Forward Selection
null_model <- lm(mpg ~ 1, data = mtcars)

full_model <- lm(mpg ~ ., data = mtcars)

step_forward <- step(null_model,
                     scope = list(lower = null_model, upper = full_model),
                     direction = "forward")
## Start:  AIC=115.94
## mpg ~ 1
## 
##        Df Sum of Sq     RSS     AIC
## + wt    1    847.73  278.32  73.217
## + cyl   1    817.71  308.33  76.494
## + disp  1    808.89  317.16  77.397
## + hp    1    678.37  447.67  88.427
## + drat  1    522.48  603.57  97.988
## + vs    1    496.53  629.52  99.335
## + am    1    405.15  720.90 103.672
## + carb  1    341.78  784.27 106.369
## + gear  1    259.75  866.30 109.552
## + qsec  1    197.39  928.66 111.776
## <none>              1126.05 115.943
## 
## Step:  AIC=73.22
## mpg ~ wt
## 
##        Df Sum of Sq    RSS    AIC
## + cyl   1    87.150 191.17 63.198
## + hp    1    83.274 195.05 63.840
## + qsec  1    82.858 195.46 63.908
## + vs    1    54.228 224.09 68.283
## + carb  1    44.602 233.72 69.628
## + disp  1    31.639 246.68 71.356
## <none>              278.32 73.217
## + drat  1     9.081 269.24 74.156
## + gear  1     1.137 277.19 75.086
## + am    1     0.002 278.32 75.217
## 
## Step:  AIC=63.2
## mpg ~ wt + cyl
## 
##        Df Sum of Sq    RSS    AIC
## + hp    1   14.5514 176.62 62.665
## + carb  1   13.7724 177.40 62.805
## <none>              191.17 63.198
## + qsec  1   10.5674 180.60 63.378
## + gear  1    3.0281 188.14 64.687
## + disp  1    2.6796 188.49 64.746
## + vs    1    0.7059 190.47 65.080
## + am    1    0.1249 191.05 65.177
## + drat  1    0.0010 191.17 65.198
## 
## Step:  AIC=62.66
## mpg ~ wt + cyl + hp
## 
##        Df Sum of Sq    RSS    AIC
## <none>              176.62 62.665
## + am    1    6.6228 170.00 63.442
## + disp  1    6.1762 170.44 63.526
## + carb  1    2.5187 174.10 64.205
## + drat  1    2.2453 174.38 64.255
## + qsec  1    1.4010 175.22 64.410
## + gear  1    0.8558 175.76 64.509
## + vs    1    0.0599 176.56 64.654
summary(step_forward)
## 
## Call:
## lm(formula = mpg ~ wt + cyl + hp, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9290 -1.5598 -0.5311  1.1850  5.8986 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 38.75179    1.78686  21.687  < 2e-16 ***
## wt          -3.16697    0.74058  -4.276 0.000199 ***
## cyl         -0.94162    0.55092  -1.709 0.098480 .  
## hp          -0.01804    0.01188  -1.519 0.140015    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.512 on 28 degrees of freedom
## Multiple R-squared:  0.8431, Adjusted R-squared:  0.8263 
## F-statistic: 50.17 on 3 and 28 DF,  p-value: 2.184e-11
#Stepwise Selection
stepwise_model <- step(null_model,
                       scope = list(lower = null_model, upper = full_model),
                       direction = "both")
## Start:  AIC=115.94
## mpg ~ 1
## 
##        Df Sum of Sq     RSS     AIC
## + wt    1    847.73  278.32  73.217
## + cyl   1    817.71  308.33  76.494
## + disp  1    808.89  317.16  77.397
## + hp    1    678.37  447.67  88.427
## + drat  1    522.48  603.57  97.988
## + vs    1    496.53  629.52  99.335
## + am    1    405.15  720.90 103.672
## + carb  1    341.78  784.27 106.369
## + gear  1    259.75  866.30 109.552
## + qsec  1    197.39  928.66 111.776
## <none>              1126.05 115.943
## 
## Step:  AIC=73.22
## mpg ~ wt
## 
##        Df Sum of Sq     RSS     AIC
## + cyl   1     87.15  191.17  63.198
## + hp    1     83.27  195.05  63.840
## + qsec  1     82.86  195.46  63.908
## + vs    1     54.23  224.09  68.283
## + carb  1     44.60  233.72  69.628
## + disp  1     31.64  246.68  71.356
## <none>               278.32  73.217
## + drat  1      9.08  269.24  74.156
## + gear  1      1.14  277.19  75.086
## + am    1      0.00  278.32  75.217
## - wt    1    847.73 1126.05 115.943
## 
## Step:  AIC=63.2
## mpg ~ wt + cyl
## 
##        Df Sum of Sq    RSS    AIC
## + hp    1    14.551 176.62 62.665
## + carb  1    13.772 177.40 62.805
## <none>              191.17 63.198
## + qsec  1    10.567 180.60 63.378
## + gear  1     3.028 188.14 64.687
## + disp  1     2.680 188.49 64.746
## + vs    1     0.706 190.47 65.080
## + am    1     0.125 191.05 65.177
## + drat  1     0.001 191.17 65.198
## - cyl   1    87.150 278.32 73.217
## - wt    1   117.162 308.33 76.494
## 
## Step:  AIC=62.66
## mpg ~ wt + cyl + hp
## 
##        Df Sum of Sq    RSS    AIC
## <none>              176.62 62.665
## - hp    1    14.551 191.17 63.198
## + am    1     6.623 170.00 63.442
## + disp  1     6.176 170.44 63.526
## - cyl   1    18.427 195.05 63.840
## + carb  1     2.519 174.10 64.205
## + drat  1     2.245 174.38 64.255
## + qsec  1     1.401 175.22 64.410
## + gear  1     0.856 175.76 64.509
## + vs    1     0.060 176.56 64.654
## - wt    1   115.354 291.98 76.750
summary(stepwise_model)
## 
## Call:
## lm(formula = mpg ~ wt + cyl + hp, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9290 -1.5598 -0.5311  1.1850  5.8986 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 38.75179    1.78686  21.687  < 2e-16 ***
## wt          -3.16697    0.74058  -4.276 0.000199 ***
## cyl         -0.94162    0.55092  -1.709 0.098480 .  
## hp          -0.01804    0.01188  -1.519 0.140015    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.512 on 28 degrees of freedom
## Multiple R-squared:  0.8431, Adjusted R-squared:  0.8263 
## F-statistic: 50.17 on 3 and 28 DF,  p-value: 2.184e-11

##Conclusion: Variable selection helps build a better regression model by identifying the most important predictors while removing irrelevant ones. Stepwise selection is the most commonly used method in R because it balances accuracy and simplicity.