data("mtcars")
#summary of the data
library(skimr)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
skim(mtcars)
| Name | mtcars |
| Number of rows | 32 |
| Number of columns | 11 |
| _______________________ | |
| Column type frequency: | |
| numeric | 11 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| mpg | 0 | 1 | 20.09 | 6.03 | 10.40 | 15.43 | 19.20 | 22.80 | 33.90 | ▃▇▅▁▂ |
| cyl | 0 | 1 | 6.19 | 1.79 | 4.00 | 4.00 | 6.00 | 8.00 | 8.00 | ▆▁▃▁▇ |
| disp | 0 | 1 | 230.72 | 123.94 | 71.10 | 120.83 | 196.30 | 326.00 | 472.00 | ▇▃▃▃▂ |
| hp | 0 | 1 | 146.69 | 68.56 | 52.00 | 96.50 | 123.00 | 180.00 | 335.00 | ▇▇▆▃▁ |
| drat | 0 | 1 | 3.60 | 0.53 | 2.76 | 3.08 | 3.70 | 3.92 | 4.93 | ▇▃▇▅▁ |
| wt | 0 | 1 | 3.22 | 0.98 | 1.51 | 2.58 | 3.33 | 3.61 | 5.42 | ▃▃▇▁▂ |
| qsec | 0 | 1 | 17.85 | 1.79 | 14.50 | 16.89 | 17.71 | 18.90 | 22.90 | ▃▇▇▂▁ |
| vs | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
| am | 0 | 1 | 0.41 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
| gear | 0 | 1 | 3.69 | 0.74 | 3.00 | 3.00 | 4.00 | 4.00 | 5.00 | ▇▁▆▁▂ |
| carb | 0 | 1 | 2.81 | 1.62 | 1.00 | 2.00 | 2.00 | 4.00 | 8.00 | ▇▂▅▁▁ |
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
# Visualizing the histogram of the dependent variable(mpg)
# Histogram with density plot
library(ggplot2)
p1 <- ggplot(mtcars, aes(x= mpg)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#69b3a2")
p1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#visualizing the log of mpg
mtcars$log_mpg = log(mtcars$mpg)
p <- ggplot(mtcars, aes(x= log_mpg)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#69b3a2")
p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Visualizing the histogram and density plot of the independent variables
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
p1 <- ggplot(mtcars, aes(x=cyl)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#69b3a2")
p2 <- ggplot(mtcars, aes(x=disp)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#69b3a2")
p3 <- ggplot(mtcars, aes(x=hp)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#69b3a2")
p4 <- ggplot(mtcars, aes(x=wt)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#69b3a2")
p5 <- ggplot(mtcars, aes(x= drat)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#69b3a2")
p6 <- ggplot(mtcars, aes(x= qsec)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#69b3a2")
grid.arrange(p1,p2,p3,p4,p5,p6, ncol=3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#transforming the variable hp to log_hp
mtcars$log_hp = log(mtcars$hp)
p <- ggplot(mtcars, aes(x= log_hp)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#69b3a2")
p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
library(gridExtra)
plot1 <- ggplot(data = mtcars, aes(x=wt, y=mpg)) +
geom_point(aes(color = factor(am))) +
labs(x = "wt")
plot2 <- ggplot(data = mtcars, aes(x=hp, y=mpg)) +
geom_point(aes(color = factor(am))) +
labs(x = "hp")
grid.arrange(plot1, plot2, ncol = 2)
#scatterplot between the variables to observe the correlation
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(corrplot)
## corrplot 0.92 loaded
mtcars1 <- mtcars[, c(1:6)]
ggpairs(mtcars1)+theme_bw()
#correlation plot with numbers
library(corrplot)
library("Hmisc")
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
cor1 <- rcorr(as.matrix(mtcars1))
cor1
## mpg cyl disp hp drat wt
## mpg 1.00 -0.85 -0.85 -0.78 0.68 -0.87
## cyl -0.85 1.00 0.90 0.83 -0.70 0.78
## disp -0.85 0.90 1.00 0.79 -0.71 0.89
## hp -0.78 0.83 0.79 1.00 -0.45 0.66
## drat 0.68 -0.70 -0.71 -0.45 1.00 -0.71
## wt -0.87 0.78 0.89 0.66 -0.71 1.00
##
## n= 32
##
##
## P
## mpg cyl disp hp drat wt
## mpg 0.00 0.00 0.00 0.00 0.00
## cyl 0.00 0.00 0.00 0.00 0.00
## disp 0.00 0.00 0.00 0.00 0.00
## hp 0.00 0.00 0.00 0.01 0.00
## drat 0.00 0.00 0.00 0.01 0.00
## wt 0.00 0.00 0.00 0.00 0.00
ggcorr(mtcars1, label = TRUE, nbreaks = 4, palette = "RdGy",label_color = "white")+ theme_bw()
mtcars$trans= factor(mtcars$am, levels=c(0,1), labels=c("automatic","manual"))
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb log_mpg
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 3.044522
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 3.044522
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 3.126761
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 3.063391
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 2.928524
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 2.895912
## log_hp trans
## Mazda RX4 4.700480 manual
## Mazda RX4 Wag 4.700480 manual
## Datsun 710 4.532599 manual
## Hornet 4 Drive 4.700480 automatic
## Hornet Sportabout 5.164786 automatic
## Valiant 4.653960 automatic
summary(mtcars$trans)
## automatic manual
## 19 13
#visualizing the manual and automatic transition
library(hrbrthemes )
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
b <- mtcars %>%
ggplot( aes(x=mtcars$trans, y=mpg) ) +
geom_boxplot(fill="#69b3a2") +
theme_ipsum() +
geom_jitter(position=position_jitter(width=.1, height=0))+
xlab("am")
b
## Warning: Use of `mtcars$trans` is discouraged. Use `trans` instead.
## Use of `mtcars$trans` is discouraged. Use `trans` instead.
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
1.Normal QQ-plot
#Normal QQ-plot
par(mfrow=c(1,2))
with(mtcars,qqnorm(mpg[trans=="manual"]));with(mtcars,qqline(mpg[trans=="manual"]))
with(mtcars,qqnorm(mpg[trans=="automatic"])); with(mtcars, qqline(mpg[trans=="automatic"]))
ggplot(mtcars, aes(sample=mpg, color=trans))+stat_qq()+theme_bw()
with(mtcars, shapiro.test(mpg[trans == "manual"]))
##
## Shapiro-Wilk normality test
##
## data: mpg[trans == "manual"]
## W = 0.9458, p-value = 0.5363
with(mtcars, shapiro.test(mpg[trans == "automatic"]))
##
## Shapiro-Wilk normality test
##
## data: mpg[trans == "automatic"]
## W = 0.97677, p-value = 0.8987
with(mtcars,var(mpg[trans=="manual"]))
## [1] 38.02577
with(mtcars,var(mpg[trans=="automatic"]))
## [1] 14.6993
with(mtcars,t.test(mpg~trans,mu=0, alt="two.sided",conf=0.95,var.eq=F,paired=F))
##
## Welch Two Sample t-test
##
## data: mpg by trans
## t = -3.7671, df = 18.332, p-value = 0.001374
## alternative hypothesis: true difference in means between group automatic and group manual is not equal to 0
## 95 percent confidence interval:
## -11.280194 -3.209684
## sample estimates:
## mean in group automatic mean in group manual
## 17.14737 24.39231
mtcars_1 <- mtcars[,-c(12:15)]
head(mtcars_1)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
lm <- lm(mpg~., mtcars_1)
summary(lm)
##
## Call:
## lm(formula = mpg ~ ., data = mtcars_1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4506 -1.6044 -0.1196 1.2193 4.6271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.30337 18.71788 0.657 0.5181
## cyl -0.11144 1.04502 -0.107 0.9161
## disp 0.01334 0.01786 0.747 0.4635
## hp -0.02148 0.02177 -0.987 0.3350
## drat 0.78711 1.63537 0.481 0.6353
## wt -3.71530 1.89441 -1.961 0.0633 .
## qsec 0.82104 0.73084 1.123 0.2739
## vs 0.31776 2.10451 0.151 0.8814
## am 2.52023 2.05665 1.225 0.2340
## gear 0.65541 1.49326 0.439 0.6652
## carb -0.19942 0.82875 -0.241 0.8122
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared: 0.869, Adjusted R-squared: 0.8066
## F-statistic: 13.93 on 10 and 21 DF, p-value: 3.793e-07
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
step = stepAIC(lm(mpg~.,data = mtcars_1))
## Start: AIC=70.9
## mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb
##
## Df Sum of Sq RSS AIC
## - cyl 1 0.0799 147.57 68.915
## - vs 1 0.1601 147.66 68.932
## - carb 1 0.4067 147.90 68.986
## - gear 1 1.3531 148.85 69.190
## - drat 1 1.6270 149.12 69.249
## - disp 1 3.9167 151.41 69.736
## - hp 1 6.8399 154.33 70.348
## - qsec 1 8.8641 156.36 70.765
## <none> 147.49 70.898
## - am 1 10.5467 158.04 71.108
## - wt 1 27.0144 174.51 74.280
##
## Step: AIC=68.92
## mpg ~ disp + hp + drat + wt + qsec + vs + am + gear + carb
##
## Df Sum of Sq RSS AIC
## - vs 1 0.2685 147.84 66.973
## - carb 1 0.5201 148.09 67.028
## - gear 1 1.8211 149.40 67.308
## - drat 1 1.9826 149.56 67.342
## - disp 1 3.9009 151.47 67.750
## - hp 1 7.3632 154.94 68.473
## <none> 147.57 68.915
## - qsec 1 10.0933 157.67 69.032
## - am 1 11.8359 159.41 69.384
## - wt 1 27.0280 174.60 72.297
##
## Step: AIC=66.97
## mpg ~ disp + hp + drat + wt + qsec + am + gear + carb
##
## Df Sum of Sq RSS AIC
## - carb 1 0.6855 148.53 65.121
## - gear 1 2.1437 149.99 65.434
## - drat 1 2.2139 150.06 65.449
## - disp 1 3.6467 151.49 65.753
## - hp 1 7.1060 154.95 66.475
## <none> 147.84 66.973
## - am 1 11.5694 159.41 67.384
## - qsec 1 15.6830 163.53 68.200
## - wt 1 27.3799 175.22 70.410
##
## Step: AIC=65.12
## mpg ~ disp + hp + drat + wt + qsec + am + gear
##
## Df Sum of Sq RSS AIC
## - gear 1 1.565 150.09 63.457
## - drat 1 1.932 150.46 63.535
## <none> 148.53 65.121
## - disp 1 10.110 158.64 65.229
## - am 1 12.323 160.85 65.672
## - hp 1 14.826 163.35 66.166
## - qsec 1 26.408 174.94 68.358
## - wt 1 69.127 217.66 75.350
##
## Step: AIC=63.46
## mpg ~ disp + hp + drat + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - drat 1 3.345 153.44 62.162
## - disp 1 8.545 158.64 63.229
## <none> 150.09 63.457
## - hp 1 13.285 163.38 64.171
## - am 1 20.036 170.13 65.466
## - qsec 1 25.574 175.67 66.491
## - wt 1 67.572 217.66 73.351
##
## Step: AIC=62.16
## mpg ~ disp + hp + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - disp 1 6.629 160.07 61.515
## <none> 153.44 62.162
## - hp 1 12.572 166.01 62.682
## - qsec 1 26.470 179.91 65.255
## - am 1 32.198 185.63 66.258
## - wt 1 69.043 222.48 72.051
##
## Step: AIC=61.52
## mpg ~ hp + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - hp 1 9.219 169.29 61.307
## <none> 160.07 61.515
## - qsec 1 20.225 180.29 63.323
## - am 1 25.993 186.06 64.331
## - wt 1 78.494 238.56 72.284
##
## Step: AIC=61.31
## mpg ~ wt + qsec + am
##
## Df Sum of Sq RSS AIC
## <none> 169.29 61.307
## - am 1 26.178 195.46 63.908
## - qsec 1 109.034 278.32 75.217
## - wt 1 183.347 352.63 82.790
step
##
## Call:
## lm(formula = mpg ~ wt + qsec + am, data = mtcars_1)
##
## Coefficients:
## (Intercept) wt qsec am
## 9.618 -3.917 1.226 2.936
lm_model_1 <- lm(mpg ~ wt + qsec + am, mtcars_1)
summary(lm_model_1)
##
## Call:
## lm(formula = mpg ~ wt + qsec + am, data = mtcars_1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4811 -1.5555 -0.7257 1.4110 4.6610
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.6178 6.9596 1.382 0.177915
## wt -3.9165 0.7112 -5.507 6.95e-06 ***
## qsec 1.2259 0.2887 4.247 0.000216 ***
## am 2.9358 1.4109 2.081 0.046716 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.459 on 28 degrees of freedom
## Multiple R-squared: 0.8497, Adjusted R-squared: 0.8336
## F-statistic: 52.75 on 3 and 28 DF, p-value: 1.21e-11
par(mfrow=c(2,2))
plot(lm_model_1)
library(gvlma)
gvlma::gvlma(x = lm_model_1)
##
## Call:
## lm(formula = mpg ~ wt + qsec + am, data = mtcars_1)
##
## Coefficients:
## (Intercept) wt qsec am
## 9.618 -3.917 1.226 2.936
##
##
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance = 0.05
##
## Call:
## gvlma::gvlma(x = lm_model_1)
##
## Value p-value Decision
## Global Stat 11.3771 0.022637 Assumptions NOT satisfied!
## Skewness 1.5545 0.212473 Assumptions acceptable.
## Kurtosis 0.6585 0.417077 Assumptions acceptable.
## Link Function 8.9061 0.002842 Assumptions NOT satisfied!
## Heteroscedasticity 0.2580 0.611526 Assumptions acceptable.
stepback <- stepAIC(lm(mpg~.,data = mtcars_1), direction = "backward")
## Start: AIC=70.9
## mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb
##
## Df Sum of Sq RSS AIC
## - cyl 1 0.0799 147.57 68.915
## - vs 1 0.1601 147.66 68.932
## - carb 1 0.4067 147.90 68.986
## - gear 1 1.3531 148.85 69.190
## - drat 1 1.6270 149.12 69.249
## - disp 1 3.9167 151.41 69.736
## - hp 1 6.8399 154.33 70.348
## - qsec 1 8.8641 156.36 70.765
## <none> 147.49 70.898
## - am 1 10.5467 158.04 71.108
## - wt 1 27.0144 174.51 74.280
##
## Step: AIC=68.92
## mpg ~ disp + hp + drat + wt + qsec + vs + am + gear + carb
##
## Df Sum of Sq RSS AIC
## - vs 1 0.2685 147.84 66.973
## - carb 1 0.5201 148.09 67.028
## - gear 1 1.8211 149.40 67.308
## - drat 1 1.9826 149.56 67.342
## - disp 1 3.9009 151.47 67.750
## - hp 1 7.3632 154.94 68.473
## <none> 147.57 68.915
## - qsec 1 10.0933 157.67 69.032
## - am 1 11.8359 159.41 69.384
## - wt 1 27.0280 174.60 72.297
##
## Step: AIC=66.97
## mpg ~ disp + hp + drat + wt + qsec + am + gear + carb
##
## Df Sum of Sq RSS AIC
## - carb 1 0.6855 148.53 65.121
## - gear 1 2.1437 149.99 65.434
## - drat 1 2.2139 150.06 65.449
## - disp 1 3.6467 151.49 65.753
## - hp 1 7.1060 154.95 66.475
## <none> 147.84 66.973
## - am 1 11.5694 159.41 67.384
## - qsec 1 15.6830 163.53 68.200
## - wt 1 27.3799 175.22 70.410
##
## Step: AIC=65.12
## mpg ~ disp + hp + drat + wt + qsec + am + gear
##
## Df Sum of Sq RSS AIC
## - gear 1 1.565 150.09 63.457
## - drat 1 1.932 150.46 63.535
## <none> 148.53 65.121
## - disp 1 10.110 158.64 65.229
## - am 1 12.323 160.85 65.672
## - hp 1 14.826 163.35 66.166
## - qsec 1 26.408 174.94 68.358
## - wt 1 69.127 217.66 75.350
##
## Step: AIC=63.46
## mpg ~ disp + hp + drat + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - drat 1 3.345 153.44 62.162
## - disp 1 8.545 158.64 63.229
## <none> 150.09 63.457
## - hp 1 13.285 163.38 64.171
## - am 1 20.036 170.13 65.466
## - qsec 1 25.574 175.67 66.491
## - wt 1 67.572 217.66 73.351
##
## Step: AIC=62.16
## mpg ~ disp + hp + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - disp 1 6.629 160.07 61.515
## <none> 153.44 62.162
## - hp 1 12.572 166.01 62.682
## - qsec 1 26.470 179.91 65.255
## - am 1 32.198 185.63 66.258
## - wt 1 69.043 222.48 72.051
##
## Step: AIC=61.52
## mpg ~ hp + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - hp 1 9.219 169.29 61.307
## <none> 160.07 61.515
## - qsec 1 20.225 180.29 63.323
## - am 1 25.993 186.06 64.331
## - wt 1 78.494 238.56 72.284
##
## Step: AIC=61.31
## mpg ~ wt + qsec + am
##
## Df Sum of Sq RSS AIC
## <none> 169.29 61.307
## - am 1 26.178 195.46 63.908
## - qsec 1 109.034 278.32 75.217
## - wt 1 183.347 352.63 82.790
stepback
##
## Call:
## lm(formula = mpg ~ wt + qsec + am, data = mtcars_1)
##
## Coefficients:
## (Intercept) wt qsec am
## 9.618 -3.917 1.226 2.936
#Building a correlation matrix
cor(as.matrix(mtcars[,c(1,7,9)]))
## mpg qsec am
## mpg 1.0000000 0.4186840 0.5998324
## qsec 0.4186840 1.0000000 -0.2298609
## am 0.5998324 -0.2298609 1.0000000
#removing mpg, hp and transmittion
mtcars_2 <- mtcars[,-c(1,4,14,15)]
head(mtcars_2)
## cyl disp drat wt qsec vs am gear carb log_mpg log_hp
## Mazda RX4 6 160 3.90 2.620 16.46 0 1 4 4 3.044522 4.700480
## Mazda RX4 Wag 6 160 3.90 2.875 17.02 0 1 4 4 3.044522 4.700480
## Datsun 710 4 108 3.85 2.320 18.61 1 1 4 1 3.126761 4.532599
## Hornet 4 Drive 6 258 3.08 3.215 19.44 1 0 3 1 3.063391 4.700480
## Hornet Sportabout 8 360 3.15 3.440 17.02 0 0 3 2 2.928524 5.164786
## Valiant 6 225 2.76 3.460 20.22 1 0 3 1 2.895912 4.653960
step2 = stepAIC(lm(log_mpg~.,data = mtcars_2))
## Start: AIC=-129.33
## log_mpg ~ cyl + disp + drat + wt + qsec + vs + am + gear + carb +
## log_hp
##
## Df Sum of Sq RSS AIC
## - disp 1 0.000007 0.28268 -131.33
## - drat 1 0.000026 0.28270 -131.33
## - vs 1 0.000086 0.28276 -131.32
## - am 1 0.001172 0.28385 -131.20
## - cyl 1 0.001331 0.28401 -131.18
## - carb 1 0.005141 0.28781 -130.76
## - qsec 1 0.005775 0.28845 -130.69
## - gear 1 0.016870 0.29954 -129.48
## <none> 0.28267 -129.33
## - log_hp 1 0.032853 0.31553 -127.82
## - wt 1 0.052399 0.33507 -125.89
##
## Step: AIC=-131.33
## log_mpg ~ cyl + drat + wt + qsec + vs + am + gear + carb + log_hp
##
## Df Sum of Sq RSS AIC
## - drat 1 0.000022 0.28270 -133.33
## - vs 1 0.000086 0.28277 -133.32
## - am 1 0.001180 0.28386 -133.20
## - cyl 1 0.001573 0.28425 -133.16
## - qsec 1 0.006194 0.28887 -132.64
## - carb 1 0.008379 0.29106 -132.40
## - gear 1 0.017012 0.29969 -131.46
## <none> 0.28268 -131.33
## - log_hp 1 0.036280 0.31896 -129.47
## - wt 1 0.117956 0.40064 -122.17
##
## Step: AIC=-133.33
## log_mpg ~ cyl + wt + qsec + vs + am + gear + carb + log_hp
##
## Df Sum of Sq RSS AIC
## - vs 1 0.000090 0.28279 -135.32
## - am 1 0.001158 0.28386 -135.20
## - cyl 1 0.001700 0.28440 -135.14
## - qsec 1 0.006443 0.28915 -134.61
## - carb 1 0.008966 0.29167 -134.33
## - gear 1 0.017148 0.29985 -133.45
## <none> 0.28270 -133.33
## - log_hp 1 0.037932 0.32064 -131.30
## - wt 1 0.118148 0.40085 -124.16
##
## Step: AIC=-135.32
## log_mpg ~ cyl + wt + qsec + am + gear + carb + log_hp
##
## Df Sum of Sq RSS AIC
## - am 1 0.001321 0.28411 -137.17
## - cyl 1 0.002227 0.28502 -137.07
## - qsec 1 0.006888 0.28968 -136.55
## - carb 1 0.009003 0.29180 -136.32
## - gear 1 0.017068 0.29986 -135.45
## <none> 0.28279 -135.32
## - log_hp 1 0.039235 0.32203 -133.16
## - wt 1 0.123145 0.40594 -125.75
##
## Step: AIC=-137.17
## log_mpg ~ cyl + wt + qsec + gear + carb + log_hp
##
## Df Sum of Sq RSS AIC
## - cyl 1 0.001637 0.28575 -138.99
## - qsec 1 0.005572 0.28969 -138.55
## - carb 1 0.009031 0.29315 -138.17
## <none> 0.28411 -137.17
## - gear 1 0.023660 0.30777 -136.61
## - log_hp 1 0.043146 0.32726 -134.65
## - wt 1 0.127090 0.41120 -127.34
##
## Step: AIC=-138.99
## log_mpg ~ wt + qsec + gear + carb + log_hp
##
## Df Sum of Sq RSS AIC
## - qsec 1 0.003935 0.28969 -140.55
## - carb 1 0.008500 0.29425 -140.05
## <none> 0.28575 -138.99
## - gear 1 0.025311 0.31106 -138.27
## - log_hp 1 0.044851 0.33060 -136.32
## - wt 1 0.130582 0.41633 -128.94
##
## Step: AIC=-140.55
## log_mpg ~ wt + gear + carb + log_hp
##
## Df Sum of Sq RSS AIC
## - carb 1 0.012582 0.30227 -141.19
## <none> 0.28969 -140.55
## - gear 1 0.025830 0.31552 -139.82
## - log_hp 1 0.120610 0.41030 -131.41
## - wt 1 0.145644 0.43533 -129.52
##
## Step: AIC=-141.19
## log_mpg ~ wt + gear + log_hp
##
## Df Sum of Sq RSS AIC
## - gear 1 0.013273 0.31554 -141.81
## <none> 0.30227 -141.19
## - wt 1 0.211739 0.51401 -126.20
## - log_hp 1 0.250113 0.55238 -123.90
##
## Step: AIC=-141.81
## log_mpg ~ wt + log_hp
##
## Df Sum of Sq RSS AIC
## <none> 0.31554 -141.81
## - log_hp 1 0.24092 0.55646 -125.66
## - wt 1 0.46586 0.78140 -114.80
step2
##
## Call:
## lm(formula = log_mpg ~ wt + log_hp, data = mtcars_2)
##
## Coefficients:
## (Intercept) wt log_hp
## 4.8317 -0.1794 -0.2657
lm_model_2 <- lm(formula = log_mpg ~ wt + log_hp, data = mtcars_2)
summary(lm_model_2)
##
## Call:
## lm(formula = log_mpg ~ wt + log_hp, data = mtcars_2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16296 -0.07799 -0.02210 0.06837 0.25985
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.83167 0.22198 21.766 < 2e-16 ***
## wt -0.17942 0.02742 -6.543 3.63e-07 ***
## log_hp -0.26566 0.05646 -4.706 5.75e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1043 on 29 degrees of freedom
## Multiple R-squared: 0.8852, Adjusted R-squared: 0.8773
## F-statistic: 111.8 on 2 and 29 DF, p-value: 2.338e-14
par(mfrow=c(2,2))
plot(lm_model_2)
library(gvlma)
gvlma::gvlma(x = lm_model_2)
##
## Call:
## lm(formula = log_mpg ~ wt + log_hp, data = mtcars_2)
##
## Coefficients:
## (Intercept) wt log_hp
## 4.8317 -0.1794 -0.2657
##
##
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance = 0.05
##
## Call:
## gvlma::gvlma(x = lm_model_2)
##
## Value p-value Decision
## Global Stat 3.11370 0.5390 Assumptions acceptable.
## Skewness 1.93334 0.1644 Assumptions acceptable.
## Kurtosis 0.06221 0.8030 Assumptions acceptable.
## Link Function 0.62940 0.4276 Assumptions acceptable.
## Heteroscedasticity 0.48875 0.4845 Assumptions acceptable.
#Building a correlation matrix
cor(as.matrix(mtcars_2[,c(1,6,11)]))
## cyl vs log_hp
## cyl 1.0000000 -0.8108118 0.8806646
## vs -0.8108118 1.0000000 -0.7617319
## log_hp 0.8806646 -0.7617319 1.0000000