Prepration

packages

require(psych)
## 载入需要的程辑包:psych

Data

Data Loading

data(mtcars)

Analysis

EDA

Summary of Dataset

describe(mtcars)
##      vars  n   mean     sd median trimmed    mad   min    max  range  skew
## mpg     1 32  20.09   6.03  19.20   19.70   5.41 10.40  33.90  23.50  0.61
## cyl     2 32   6.19   1.79   6.00    6.23   2.97  4.00   8.00   4.00 -0.17
## disp    3 32 230.72 123.94 196.30  222.52 140.48 71.10 472.00 400.90  0.38
## hp      4 32 146.69  68.56 123.00  141.19  77.10 52.00 335.00 283.00  0.73
## drat    5 32   3.60   0.53   3.70    3.58   0.70  2.76   4.93   2.17  0.27
## wt      6 32   3.22   0.98   3.33    3.15   0.77  1.51   5.42   3.91  0.42
## qsec    7 32  17.85   1.79  17.71   17.83   1.42 14.50  22.90   8.40  0.37
## vs      8 32   0.44   0.50   0.00    0.42   0.00  0.00   1.00   1.00  0.24
## am      9 32   0.41   0.50   0.00    0.38   0.00  0.00   1.00   1.00  0.36
## gear   10 32   3.69   0.74   4.00    3.62   1.48  3.00   5.00   2.00  0.53
## carb   11 32   2.81   1.62   2.00    2.65   1.48  1.00   8.00   7.00  1.05
##      kurtosis    se
## mpg     -0.37  1.07
## cyl     -1.76  0.32
## disp    -1.21 21.91
## hp      -0.14 12.12
## drat    -0.71  0.09
## wt      -0.02  0.17
## qsec     0.34  0.32
## vs      -2.00  0.09
## am      -1.92  0.09
## gear    -1.07  0.13
## carb     1.26  0.29

Histogram of Individual Variable

hist(mtcars$mpg)
abline(v=mean(mtcars$mpg), col="red", lwd=3, lty=2)

plot(density(mtcars$mpg))
abline(v=mean(mtcars$mpg), col="red", lwd=3, lty=2)

hist(mtcars$wt)
abline(v=mean(mtcars$wt), col="red", lwd=3, lty=2)

plot(density(mtcars$wt))
abline(v=mean(mtcars$wt), col="red", lwd=3, lty=2)

Correlation

Correlation Table

cor(mtcars$mpg, mtcars$wt)
## [1] -0.8676594

#plot(mtcars)

reg <- lm(mpg ~ wt, data=mtcars)
coeff <- coefficients(reg)
plot(mtcars$wt, mtcars$mpg)
abline(reg, col='red', type="dotted")
## Warning in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): 图形参
## 数"type"已过期不用了

Regression

Estimating coefficient

m0 <- lm(mpg ~ wt, data = mtcars)
summary(m0)
## 
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5432 -2.3647 -0.1252  1.4096  6.8727 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  37.2851     1.8776  19.858  < 2e-16 ***
## wt           -5.3445     0.5591  -9.559 1.29e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared:  0.7528, Adjusted R-squared:  0.7446 
## F-statistic: 91.38 on 1 and 30 DF,  p-value: 1.294e-10

Plot

par(mfrow=c(2,2))
plot(m0)

par(mfrow=c(1,1))

Prediction

#y = ax + b #mpg = coff*wt + intercept

coff <- -5.34
intercept <- 37.2851

If there is new car, and the weight of the new care is 2.567, what would be the mpg of that car

new_car <- 2.567
mpg_new1 <- (coff * new_car) + intercept
mpg_new1
## [1] 23.57732
plot(mtcars$wt, mtcars$mpg)
abline(reg, col='red', type="dotted")
## Warning in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): 图形参
## 数"type"已过期不用了
abline(v=new_car, col='blue', type="dotted")
## Warning in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): 图形参
## 数"type"已过期不用了
abline(h=mpg_new1, col='yellow', type="dotted")
## Warning in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): 图形参
## 数"type"已过期不用了

#Calculate your estimation of mpg of new car when the new car weight is 7.8

new_car <- 7.8
mpg_new1 <- (coff * new_car) + intercept
mpg_new1
## [1] -4.3669
plot(mtcars$wt, mtcars$mpg)
abline(reg, col='red', type="dotted")
## Warning in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): 图形参
## 数"type"已过期不用了
abline(v=new_car, col='blue', type="dotted")
## Warning in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): 图形参
## 数"type"已过期不用了
abline(h=mpg_new1, col='yellow', type="dotted")
## Warning in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): 图形参
## 数"type"已过期不用了