library(ISLR2)
library(ggplot2)
data(Auto)
str(Auto)
## 'data.frame': 392 obs. of 9 variables:
## $ mpg : num 18 15 18 16 17 15 14 14 14 15 ...
## $ cylinders : int 8 8 8 8 8 8 8 8 8 8 ...
## $ displacement: num 307 350 318 304 302 429 454 440 455 390 ...
## $ horsepower : int 130 165 150 150 140 198 220 215 225 190 ...
## $ weight : int 3504 3693 3436 3433 3449 4341 4354 4312 4425 3850 ...
## $ acceleration: num 12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
## $ year : int 70 70 70 70 70 70 70 70 70 70 ...
## $ origin : int 1 1 1 1 1 1 1 1 1 1 ...
## $ name : Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...
## - attr(*, "na.action")= 'omit' Named int [1:5] 33 127 331 337 355
## ..- attr(*, "names")= chr [1:5] "33" "127" "331" "337" ...
quantitative_vars <- c("mpg", "displacement", "horsepower", "weight", "acceleration", "year", "cylinders")
qualitative_vars <- c("origin", "name")
quantitative_vars
## [1] "mpg" "displacement" "horsepower" "weight" "acceleration"
## [6] "year" "cylinders"
qualitative_vars
## [1] "origin" "name"
sapply(Auto[, quantitative_vars], range)
## mpg displacement horsepower weight acceleration year cylinders
## [1,] 9.0 68 46 1613 8.0 70 3
## [2,] 46.6 455 230 5140 24.8 82 8
data_summary <- sapply(Auto[, quantitative_vars], function(x) c(mean = mean(x), sd = sd(x)))
data_summary
## mpg displacement horsepower weight acceleration year
## mean 23.445918 194.412 104.46939 2977.5842 15.541327 75.979592
## sd 7.805007 104.644 38.49116 849.4026 2.758864 3.683737
## cylinders
## mean 5.471939
## sd 1.705783
Auto_subset <- Auto[-(10:85), ]
subset_summary <- sapply(Auto_subset[, quantitative_vars], function(x) c(range = range(x), mean = mean(x), sd = sd(x)))
subset_summary
## mpg displacement horsepower weight acceleration year
## range1 11.000000 68.00000 46.00000 1649.0000 8.500000 70.000000
## range2 46.600000 455.00000 230.00000 4997.0000 24.800000 82.000000
## mean 24.404430 187.24051 100.72152 2935.9715 15.726899 77.145570
## sd 7.867283 99.67837 35.70885 811.3002 2.693721 3.106217
## cylinders
## range1 3.000000
## range2 8.000000
## mean 5.373418
## sd 1.654179
plot1 <- ggplot(Auto, aes(x = weight, y = mpg)) + geom_point() + geom_smooth(method = "lm") + ggtitle("MPG vs Weight")
plot2 <- ggplot(Auto, aes(x = horsepower, y = mpg)) + geom_point() + geom_smooth(method = "lm") + ggtitle("MPG vs Horsepower")
plot3 <- ggplot(Auto, aes(x = displacement, y = mpg)) + geom_point() + geom_smooth(method = "lm") + ggtitle("MPG vs Displacement")
plot1
## `geom_smooth()` using formula = 'y ~ x'
plot2
## `geom_smooth()` using formula = 'y ~ x'
plot3
## `geom_smooth()` using formula = 'y ~ x'
From the scatterplots above, we can observe that mpg is negatively correlated with weight, horsepower, and displacement. This suggests that these variables may be useful in predicting mpg, as heavier cars with larger engines tend to have lower mileage. Further regression analysis could confirm these relationships.
This analysis helps understand key relationships within the Auto dataset.