data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
model <- lm(wt ~ hp, data = mtcars)
summary(model)
## 
## Call:
## lm(formula = wt ~ hp, data = mtcars)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.41757 -0.53122 -0.02038  0.42536  1.56455 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.838247   0.316520   5.808 2.39e-06 ***
## hp          0.009401   0.001960   4.796 4.15e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7483 on 30 degrees of freedom
## Multiple R-squared:  0.4339, Adjusted R-squared:  0.4151 
## F-statistic:    23 on 1 and 30 DF,  p-value: 4.146e-05
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
p1 <- ggplot(mtcars, aes(x = hp, y = wt)) +
  geom_point(color = "red") +
  labs(title = "Horsepower vs Weight",
       x = "Horsepower",
       y = "Weight") +
  theme_minimal()
ggplot(mtcars, aes(x = hp, y = wt)) +
  geom_point(color = "red") +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  theme_minimal() +
  labs(title = "Linear Regression of Horsepower and Weight",
       x = "Horsepower",
       y = "Weight")
## `geom_smooth()` using formula = 'y ~ x'

Residuals vs Fitted plot

# Residuals vs Fitted plot
ggplot(model, aes(.fitted, .resid)) +
  geom_point(color = "red") +
  theme_minimal() +
  geom_hline(yintercept = 0, linetype = "dashed") +
  labs(title = "Residuals vs Fitted ",
       x = "Fitted ",
       y = "Residuals")

model3d <- lm(mpg ~ hp + wt, data = mtcars)
mtcars$predicted <- predict(model3d)

p<-plot_ly(mtcars, x = ~hp, y = ~wt, z = ~mpg, type = 'scatter3d', mode = 'markers', marker = list(color = "#8C1D40")) %>%
  add_mesh(x = ~hp, y = ~wt, z = ~predicted, opacity = 0.5) %>%
  layout(title = "3D Scatter Plot with Regression Plane",
         scene = list(xaxis = list(title = 'Horsepower'),
                      yaxis = list(title = 'Weight'),
                      zaxis = list(title = 'MPG')))
p
## Warning: 'mesh3d' objects don't have these attributes: 'mode', 'marker'
## Valid attributes include:
## 'alphahull', 'autocolorscale', 'cauto', 'cmax', 'cmid', 'cmin', 'color', 'coloraxis', 'colorbar', 'colorscale', 'contour', 'customdata', 'customdatasrc', 'delaunayaxis', 'facecolor', 'facecolorsrc', 'flatshading', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'i', 'ids', 'idssrc', 'intensity', 'intensitymode', 'intensitysrc', 'isrc', 'j', 'jsrc', 'k', 'ksrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'lighting', 'lightposition', 'meta', 'metasrc', 'name', 'opacity', 'reversescale', 'scene', 'showlegend', 'showscale', 'stream', 'text', 'textsrc', 'type', 'uid', 'uirevision', 'vertexcolor', 'vertexcolorsrc', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

Continue selecting the mtcars dataset

The question is how to determine the value of mpg

data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

Examine the relationships between various numerical variables

# Select columns 1, 3, 4, 5, 6, and 7
mtcars_num <- mtcars[,c(1,3:7)]
# Perform correlation calculation
cor_mtcars <- cor(mtcars_num)

# Display the correlation matrix
cor_mtcars
##             mpg       disp         hp        drat         wt        qsec
## mpg   1.0000000 -0.8475514 -0.7761684  0.68117191 -0.8676594  0.41868403
## disp -0.8475514  1.0000000  0.7909486 -0.71021393  0.8879799 -0.43369788
## hp   -0.7761684  0.7909486  1.0000000 -0.44875912  0.6587479 -0.70822339
## drat  0.6811719 -0.7102139 -0.4487591  1.00000000 -0.7124406  0.09120476
## wt   -0.8676594  0.8879799  0.6587479 -0.71244065  1.0000000 -0.17471588
## qsec  0.4186840 -0.4336979 -0.7082234  0.09120476 -0.1747159  1.00000000
# Display the correlation matrix using a heatmap
library(pheatmap)
pheatmap(cor_mtcars)

It can be seen that mpg has a high positive correlation with drat and a high negative correlation with wt and disp

Display mpg distribution across different categorical variables

library(ggplot2)
p1 <- ggplot(mtcars,aes(cyl,mpg,color=factor(cyl)))+
  geom_point()+
  theme_bw()
p2 <- ggplot(mtcars,aes(vs,mpg,color=factor(vs)))+
  geom_point()+
  theme_bw()
p3 <- ggplot(mtcars,aes(am,mpg,color=factor(am)))+
  geom_point()+
  theme_bw()
p4 <- ggplot(mtcars,aes(gear,mpg,color=factor(gear)))+
  geom_point()+
  theme_bw()
p5 <- ggplot(mtcars,aes(carb,mpg,color=factor(carb)))+
  geom_point()+
  theme_bw()

# Combine images
library(cowplot)
plot_grid(p1,p2,p3,p4,p5)

It can be observed that mpg differs across categories

Build a multiple linear regression model

model <- lm(mpg~drat+wt+disp+cyl+vs+am+gear+carb,mtcars)
predict_mtcars <- predict(model,mtcars)

# Compare predictions with actual values


plot_data <- data.frame(predict_mpg=predict_mtcars,true_mpg=mtcars$mpg)
ggplot(plot_data,aes(true_mpg,predict_mtcars))+
  geom_point(color='purple')+
  geom_abline(slope=1,intercept = 0,linetype='dashed',color='darkred')+
  theme_bw()