library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(DescTools)
help(mtcars)
mtcars
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
ncol(mtcars)
## [1] 11
nrow(mtcars)
## [1] 32
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point()
is.na(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear
## Mazda RX4 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Mazda RX4 Wag FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Datsun 710 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Hornet 4 Drive FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Hornet Sportabout FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Valiant FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Duster 360 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Merc 240D FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Merc 230 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Merc 280 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Merc 280C FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Merc 450SE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Merc 450SL FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Merc 450SLC FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Cadillac Fleetwood FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Lincoln Continental FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Chrysler Imperial FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Fiat 128 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Honda Civic FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Toyota Corolla FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Toyota Corona FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Dodge Challenger FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## AMC Javelin FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Camaro Z28 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Pontiac Firebird FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Fiat X1-9 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Porsche 914-2 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Lotus Europa FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Ford Pantera L FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Ferrari Dino FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Maserati Bora FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Volvo 142E FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## carb
## Mazda RX4 FALSE
## Mazda RX4 Wag FALSE
## Datsun 710 FALSE
## Hornet 4 Drive FALSE
## Hornet Sportabout FALSE
## Valiant FALSE
## Duster 360 FALSE
## Merc 240D FALSE
## Merc 230 FALSE
## Merc 280 FALSE
## Merc 280C FALSE
## Merc 450SE FALSE
## Merc 450SL FALSE
## Merc 450SLC FALSE
## Cadillac Fleetwood FALSE
## Lincoln Continental FALSE
## Chrysler Imperial FALSE
## Fiat 128 FALSE
## Honda Civic FALSE
## Toyota Corolla FALSE
## Toyota Corona FALSE
## Dodge Challenger FALSE
## AMC Javelin FALSE
## Camaro Z28 FALSE
## Pontiac Firebird FALSE
## Fiat X1-9 FALSE
## Porsche 914-2 FALSE
## Lotus Europa FALSE
## Ford Pantera L FALSE
## Ferrari Dino FALSE
## Maserati Bora FALSE
## Volvo 142E FALSE
sum(is.na(mtcars))
## [1] 0
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
full_model <- lm(mpg ~ ., data = mtcars)
summary(full_model)
##
## Call:
## lm(formula = mpg ~ ., data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4506 -1.6044 -0.1196 1.2193 4.6271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.30337 18.71788 0.657 0.5181
## cyl -0.11144 1.04502 -0.107 0.9161
## disp 0.01334 0.01786 0.747 0.4635
## hp -0.02148 0.02177 -0.987 0.3350
## drat 0.78711 1.63537 0.481 0.6353
## wt -3.71530 1.89441 -1.961 0.0633 .
## qsec 0.82104 0.73084 1.123 0.2739
## vs 0.31776 2.10451 0.151 0.8814
## am 2.52023 2.05665 1.225 0.2340
## gear 0.65541 1.49326 0.439 0.6652
## carb -0.19942 0.82875 -0.241 0.8122
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared: 0.869, Adjusted R-squared: 0.8066
## F-statistic: 13.93 on 10 and 21 DF, p-value: 3.793e-07
predicted <- predict(full_model)
mse <- mean((mtcars$mpg - predicted)^2)
summary(mse)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.609 4.609 4.609 4.609 4.609 4.609
interaction <- lm(mpg ~ . + wt*am, data = mtcars)
summary(full_model)
##
## Call:
## lm(formula = mpg ~ ., data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4506 -1.6044 -0.1196 1.2193 4.6271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.30337 18.71788 0.657 0.5181
## cyl -0.11144 1.04502 -0.107 0.9161
## disp 0.01334 0.01786 0.747 0.4635
## hp -0.02148 0.02177 -0.987 0.3350
## drat 0.78711 1.63537 0.481 0.6353
## wt -3.71530 1.89441 -1.961 0.0633 .
## qsec 0.82104 0.73084 1.123 0.2739
## vs 0.31776 2.10451 0.151 0.8814
## am 2.52023 2.05665 1.225 0.2340
## gear 0.65541 1.49326 0.439 0.6652
## carb -0.19942 0.82875 -0.241 0.8122
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared: 0.869, Adjusted R-squared: 0.8066
## F-statistic: 13.93 on 10 and 21 DF, p-value: 3.793e-07
summary(interaction)
##
## Call:
## lm(formula = mpg ~ . + wt * am, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0807 -1.4803 -0.4741 1.3226 4.5850
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.070807 17.191096 0.120 0.90532
## cyl 0.225567 0.942201 0.239 0.81323
## disp 0.004384 0.016328 0.269 0.79105
## hp -0.006131 0.020359 -0.301 0.76643
## drat 0.359683 1.469370 0.245 0.80911
## wt -3.030663 1.712406 -1.770 0.09200 .
## qsec 1.109647 0.662235 1.676 0.10938
## vs -0.077414 1.884792 -0.041 0.96764
## am 13.334354 4.663324 2.859 0.00969 **
## gear 1.108383 1.344775 0.824 0.41954
## carb -0.090545 0.740919 -0.122 0.90395
## wt:am -4.137886 1.640318 -2.523 0.02023 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.365 on 20 degrees of freedom
## Multiple R-squared: 0.9006, Adjusted R-squared: 0.846
## F-statistic: 16.48 on 11 and 20 DF, p-value: 1.081e-07
boxplot(mtcars, main="Boxplot for all variables")
winsorize <- mtcars
winsorize$hp <- Winsorize(mtcars$hp, val = quantile(mtcars$hp, probs = c(0.05, 0.95), na.rm = FALSE))
winsorize$wt <- Winsorize(mtcars$wt, val = quantile(mtcars$wt, probs = c(0.05, 0.95), na.rm = FALSE))
winsorize$qsec <- Winsorize(mtcars$qsec, val = quantile(mtcars$qsec, probs = c(0.05, 0.95), na.rm = FALSE))
winsorize$carb <- Winsorize(mtcars$carb, val = quantile(mtcars$carb, probs = c(0.05, 0.95), na.rm = FALSE))
summary(winsorize$hp)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 63.65 96.50 123.00 144.23 180.00 253.55
summary(winsorize$wt)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.736 2.581 3.325 3.222 3.610 5.293
summary(winsorize$qsec)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.05 16.89 17.71 17.79 18.90 20.10
summary(winsorize$carb)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 2.000 2.681 4.000 4.900
win_c <- winsorize [ , c("hp", "wt", "qsec", "carb")]
summary(win_c)
## hp wt qsec carb
## Min. : 63.65 Min. :1.736 Min. :15.05 Min. :1.000
## 1st Qu.: 96.50 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:2.000
## Median :123.00 Median :3.325 Median :17.71 Median :2.000
## Mean :144.23 Mean :3.222 Mean :17.79 Mean :2.681
## 3rd Qu.:180.00 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:4.000
## Max. :253.55 Max. :5.293 Max. :20.10 Max. :4.900
wins_model <- lm(mpg ~ ., data = winsorize)
summary(full_model)
##
## Call:
## lm(formula = mpg ~ ., data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4506 -1.6044 -0.1196 1.2193 4.6271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.30337 18.71788 0.657 0.5181
## cyl -0.11144 1.04502 -0.107 0.9161
## disp 0.01334 0.01786 0.747 0.4635
## hp -0.02148 0.02177 -0.987 0.3350
## drat 0.78711 1.63537 0.481 0.6353
## wt -3.71530 1.89441 -1.961 0.0633 .
## qsec 0.82104 0.73084 1.123 0.2739
## vs 0.31776 2.10451 0.151 0.8814
## am 2.52023 2.05665 1.225 0.2340
## gear 0.65541 1.49326 0.439 0.6652
## carb -0.19942 0.82875 -0.241 0.8122
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared: 0.869, Adjusted R-squared: 0.8066
## F-statistic: 13.93 on 10 and 21 DF, p-value: 3.793e-07
summary(wins_model)
##
## Call:
## lm(formula = mpg ~ ., data = winsorize)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4747 -1.2929 -0.3913 0.9772 4.2880
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.41946 24.84625 0.339 0.738
## cyl -0.20073 1.01372 -0.198 0.845
## disp 0.01124 0.01864 0.603 0.553
## hp -0.02169 0.02658 -0.816 0.424
## drat 1.43716 1.85184 0.776 0.446
## wt -3.46568 2.50599 -1.383 0.181
## qsec 0.97038 1.25256 0.775 0.447
## vs -0.59474 2.52272 -0.236 0.816
## am 1.58236 1.99461 0.793 0.436
## gear 0.85737 1.44313 0.594 0.559
## carb -0.50681 1.08940 -0.465 0.647
##
## Residual standard error: 2.66 on 21 degrees of freedom
## Multiple R-squared: 0.868, Adjusted R-squared: 0.8052
## F-statistic: 13.81 on 10 and 21 DF, p-value: 4.095e-07