library(datasets)
library(ggplot2)
data(mtcars)
mtcars$am <- factor(mtcars$am, labels=c("Automatic", "Manual"))
Let’s make a t test to understand if means of mpg variables for each transmission type are significantly different.
ttest<-t.test(mpg~am, data=mtcars)
print(ttest)
##
## Welch Two Sample t-test
##
## data: mpg by am
## t = -3.7671, df = 18.332, p-value = 0.001374
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -11.280194 -3.209684
## sample estimates:
## mean in group Automatic mean in group Manual
## 17.14737 24.39231
According to t test, there is a significance difference of means of mpg variables for each tranmission type. Let’s validate by plotting.
plt<-ggplot(mtcars, aes(am, mpg, group=am,color=am)) + geom_boxplot()
print(plt)
To understand which variables most effect the lm, we will use step function.
stepmodel<-step(lm(mpg~., data=mtcars), trace=0)
print(stepmodel)
##
## Call:
## lm(formula = mpg ~ wt + qsec + am, data = mtcars)
##
## Coefficients:
## (Intercept) wt qsec amManual
## 9.618 -3.917 1.226 2.936
It seems that wt, qsec and am variables are the variables that effect the lm. We will train run lm again by using variables wt and qsec, controled by am.
model<-lm(mpg~ am:(wt+qsec),data=mtcars)
print(model)
##
## Call:
## lm(formula = mpg ~ am:(wt + qsec), data = mtcars)
##
## Coefficients:
## (Intercept) amAutomatic:wt amManual:wt amAutomatic:qsec
## 13.9692 -3.1759 -6.0992 0.8338
## amManual:qsec
## 1.4464
plt<-ggplot(mtcars, aes(wt, mpg, group=am,color=am)) + geom_line()
print(plt)
plt<-ggplot(mtcars, aes(qsec, mpg, group=am,color=am)) + geom_line()
print(plt)