data(mtcars) ?mtcars summary(mtcars) str(mtcars)
library(ggplot2) ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point() + geom_smooth(method = “lm”, se = FALSE) + labs(title = “MPG vs Weight”, x = “Weight (1000lbs)”, y = “Miles per Gallon”)
ggplot(mtcars, aes(x = factor(cyl), y = mpg)) + geom_boxplot() + labs(title = “MPG by Number of Cylinders”, x = “Cylinders”, y = “Miles per Gallon”)
cor_matrix <- cor(mtcars) cor_matrix sort(cor(mtcars)[, “mpg”])
boxplot(mtcars, main = “Boxplots of mtcars Variables”)
min(mtcars\(hp) max(mtcars\)hp) mtcars\(hp_rs <- (mtcars\)hp - min(mtcars\(hp)) / (max(mtcars\)hp) - min(mtcars\(hp)) min(mtcars\)hp_rs) max(mtcars$hp_rs)
lower <- quantile(mtcars\(wt, 0.05) upper <- quantile(mtcars\)wt, 0.95)
lower upper
mtcars\(wt_win <- ifelse(mtcars\)wt < lower, lower, ifelse(mtcars\(wt > upper, upper, mtcars\)wt)) min(mtcars\(wt_win) max(mtcars\)wt_win)
##I winsorized the variable wt at the 5th and 95th percentiles by replacing values below the 5th percentile with the 5th percentile value and values above the 95th percentile with the 95th percentile value. The new variable wt_win was created in the mtcars dataset. The minimum of wt_win equals the 5th percentile value, and the maximum equals the 95th percentile value, confirming that the extreme values were capped.