Idriss .S
12 april 2022
This presentation aims to study a data set from R :
mtcars. We will model the miles per gallon mpg
according to the weight wt.
It’s cover :
This work could be completed (of more slides are allowed) by :
knitr::opts_chunk$set(echo = FALSE)
library(ggplot2)
library(dplyr)
library(plotly)
library(radiant.data)## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
library(tidymodels)
my_mtcars <- mtcars
my_mtcars[which(my_mtcars$am==0),"am"] <- "auto"
my_mtcars[which(my_mtcars$am==1),"am"] <- "manual"
my_fit <- my_mtcars %>% lm(formula = mpg~wt)
summary(my_fit)##
## Call:
## lm(formula = mpg ~ wt, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5432 -2.3647 -0.1252 1.4096 6.8727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.2851 1.8776 19.858 < 2e-16 ***
## wt -5.3445 0.5591 -9.559 1.29e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared: 0.7528, Adjusted R-squared: 0.7446
## F-statistic: 91.38 on 1 and 30 DF, p-value: 1.294e-10
mpg_pred <- predict(object = my_fit,newdata=data.frame(wt=my_mtcars$wt))
# Buildong prediction data frame
predDF <- data.frame(wt=my_mtcars$wt,mpg=mpg_pred)
data.frame(wt=my_mtcars$wt,mpg_predicted=mpg_pred,mpg_observ=my_mtcars$mpg,residuals=my_fit$residuals)## wt mpg_predicted mpg_observ residuals
## 1 2.620 23.282611 21.0 -2.2826106
## 2 2.875 21.919770 21.0 -0.9197704
## 3 2.320 24.885952 22.8 -2.0859521
## 4 3.215 20.102650 21.4 1.2973499
## 5 3.440 18.900144 18.7 -0.2001440
## 6 3.460 18.793255 18.1 -0.6932545
## 7 3.570 18.205363 14.3 -3.9053627
## 8 3.190 20.236262 24.4 4.1637381
## 9 3.150 20.450041 22.8 2.3499593
## 10 3.440 18.900144 19.2 0.2998560
## 11 3.440 18.900144 17.8 -1.1001440
## 12 4.070 15.533127 16.4 0.8668731
## 13 3.730 17.350247 17.3 -0.0502472
## 14 3.780 17.083024 15.2 -1.8830236
## 15 5.250 9.226650 10.4 1.1733496
## 16 5.424 8.296712 10.4 2.1032876
## 17 5.345 8.718926 14.7 5.9810744
## 18 2.200 25.527289 32.4 6.8727113
## 19 1.615 28.653805 30.4 1.7461954
## 20 1.835 27.478021 33.9 6.4219792
## 21 2.465 24.111004 21.5 -2.6110037
## 22 3.520 18.472586 15.5 -2.9725862
## 23 3.435 18.926866 15.2 -3.7268663
## 24 3.840 16.762355 13.3 -3.4623553
## 25 3.845 16.735633 19.2 2.4643670
## 26 1.935 26.943574 27.3 0.3564263
## 27 2.140 25.847957 26.0 0.1520430
## 28 1.513 29.198941 30.4 1.2010593
## 29 3.170 20.343151 15.8 -4.5431513
## 30 2.770 22.480940 19.7 -2.7809399
## 31 3.570 18.205363 15.0 -3.2053627
## 32 2.780 22.427495 21.4 -1.0274952
my_l <- list(
itle=list(text='<b> Legend : </b>'),
orientation = 'v',
y = 0.05,
x = 0.05,
font = list(
family = "sans-serif",
size = 18,
color = "#000"),
bgcolor = "#E2E2E2",
bordercolor = "#FFFFFF",
borderwidth = 4)
mpt <- my_mtcars %>% rownames_to_column('car') %>%
plot_ly() %>%
add_trace(type = "scatter",
x = ~wt,
y = ~mpg,
text = ~car,
legendgroup = 'group1',
symbol = ~paste0("Transmission type : ", am),
symbols = c('circle','x'),
mode = 'markers',
marker = list(color = "grey", size = 25)) %>%
add_trace(type = "scatter",
x = ~wt,
y = ~mpg,
text = ~car,
legendgroup = 'group2',
color = ~paste0("N° of cyl : ", cyl),
mode = 'markers',
marker = list(size = 19)) %>%
add_trace(data = predDF, x = ~wt, y = ~mpg, name = 'Regression Fit', mode = 'lines', alpha = 1) %>%
layout(legend = my_l)
mpt