data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
model <- lm(wt ~ hp, data = mtcars)
summary(model)
##
## Call:
## lm(formula = wt ~ hp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.41757 -0.53122 -0.02038 0.42536 1.56455
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.838247 0.316520 5.808 2.39e-06 ***
## hp 0.009401 0.001960 4.796 4.15e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7483 on 30 degrees of freedom
## Multiple R-squared: 0.4339, Adjusted R-squared: 0.4151
## F-statistic: 23 on 1 and 30 DF, p-value: 4.146e-05
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
p1 <- ggplot(mtcars, aes(x = hp, y = wt)) +
geom_point(color = "red") +
labs(title = "Horsepower vs Weight",
x = "Horsepower",
y = "Weight") +
theme_minimal()
ggplot(mtcars, aes(x = hp, y = wt)) +
geom_point(color = "red") +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
theme_minimal() +
labs(title = "Linear Regression of Horsepower and Weight",
x = "Horsepower",
y = "Weight")
## `geom_smooth()` using formula = 'y ~ x'
# Residuals vs Fitted plot
ggplot(model, aes(.fitted, .resid)) +
geom_point(color = "red") +
theme_minimal() +
geom_hline(yintercept = 0, linetype = "dashed") +
labs(title = "Residuals vs Fitted ",
x = "Fitted ",
y = "Residuals")
model3d <- lm(mpg ~ hp + wt, data = mtcars)
mtcars$predicted <- predict(model3d)
p<-plot_ly(mtcars, x = ~hp, y = ~wt, z = ~mpg, type = 'scatter3d', mode = 'markers', marker = list(color = "#8C1D40")) %>%
add_mesh(x = ~hp, y = ~wt, z = ~predicted, opacity = 0.5) %>%
layout(title = "3D Scatter Plot with Regression Plane",
scene = list(xaxis = list(title = 'Horsepower'),
yaxis = list(title = 'Weight'),
zaxis = list(title = 'MPG')))
p
## Warning: 'mesh3d' objects don't have these attributes: 'mode', 'marker'
## Valid attributes include:
## 'alphahull', 'autocolorscale', 'cauto', 'cmax', 'cmid', 'cmin', 'color', 'coloraxis', 'colorbar', 'colorscale', 'contour', 'customdata', 'customdatasrc', 'delaunayaxis', 'facecolor', 'facecolorsrc', 'flatshading', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'i', 'ids', 'idssrc', 'intensity', 'intensitymode', 'intensitysrc', 'isrc', 'j', 'jsrc', 'k', 'ksrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'lighting', 'lightposition', 'meta', 'metasrc', 'name', 'opacity', 'reversescale', 'scene', 'showlegend', 'showscale', 'stream', 'text', 'textsrc', 'type', 'uid', 'uirevision', 'vertexcolor', 'vertexcolorsrc', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# Select columns 1, 3, 4, 5, 6, and 7
mtcars_num <- mtcars[,c(1,3:7)]
# Perform correlation calculation
cor_mtcars <- cor(mtcars_num)
# Display the correlation matrix
cor_mtcars
## mpg disp hp drat wt qsec
## mpg 1.0000000 -0.8475514 -0.7761684 0.68117191 -0.8676594 0.41868403
## disp -0.8475514 1.0000000 0.7909486 -0.71021393 0.8879799 -0.43369788
## hp -0.7761684 0.7909486 1.0000000 -0.44875912 0.6587479 -0.70822339
## drat 0.6811719 -0.7102139 -0.4487591 1.00000000 -0.7124406 0.09120476
## wt -0.8676594 0.8879799 0.6587479 -0.71244065 1.0000000 -0.17471588
## qsec 0.4186840 -0.4336979 -0.7082234 0.09120476 -0.1747159 1.00000000
# Display the correlation matrix using a heatmap
library(pheatmap)
pheatmap(cor_mtcars)
It can be seen that mpg has a high positive correlation with drat and a
high negative correlation with wt and disp
Display mpg distribution across different categorical variables
library(ggplot2)
p1 <- ggplot(mtcars,aes(cyl,mpg,color=factor(cyl)))+
geom_point()+
theme_bw()
p2 <- ggplot(mtcars,aes(vs,mpg,color=factor(vs)))+
geom_point()+
theme_bw()
p3 <- ggplot(mtcars,aes(am,mpg,color=factor(am)))+
geom_point()+
theme_bw()
p4 <- ggplot(mtcars,aes(gear,mpg,color=factor(gear)))+
geom_point()+
theme_bw()
p5 <- ggplot(mtcars,aes(carb,mpg,color=factor(carb)))+
geom_point()+
theme_bw()
# Combine images
library(cowplot)
plot_grid(p1,p2,p3,p4,p5)
It can be observed that mpg differs across categories
Build a multiple linear regression model
model <- lm(mpg~drat+wt+disp+cyl+vs+am+gear+carb,mtcars)
predict_mtcars <- predict(model,mtcars)
# Compare predictions with actual values
plot_data <- data.frame(predict_mpg=predict_mtcars,true_mpg=mtcars$mpg)
ggplot(plot_data,aes(true_mpg,predict_mtcars))+
geom_point(color='purple')+
geom_abline(slope=1,intercept = 0,linetype='dashed',color='darkred')+
theme_bw()