library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
#Print the structure of your dataset.# # Load built-in mtcars dataset #
data <- mtcars
str(data)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
#List the Variables in the Dataset.#
colnames(data)
## [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
## [11] "carb"
head(data, 15)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
convert_mpg_to_kmpl <- function(mpg) {
return(mpg * 0.425144) }
data$kmpl <- sapply(data$mpg, convert_mpg_to_kmpl)
filtered_data <- filter(data, mpg > 20)
print(filtered_data)
## mpg cyl disp hp drat wt qsec vs am gear carb kmpl
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 8.928024
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 8.928024
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 9.693283
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 9.098082
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 10.373514
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 9.693283
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 13.774666
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 12.924378
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 14.412382
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 9.140596
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 11.606431
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 11.053744
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 12.924378
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 9.098082
dependent_var <- data$mpg
independent_vars <- data %>% select(-mpg)
reshaped_data <- data.frame(MPG = dependent_var, Independent = independent_vars)
print(head(reshaped_data))
## MPG Independent.cyl Independent.disp Independent.hp
## Mazda RX4 21.0 6 160 110
## Mazda RX4 Wag 21.0 6 160 110
## Datsun 710 22.8 4 108 93
## Hornet 4 Drive 21.4 6 258 110
## Hornet Sportabout 18.7 8 360 175
## Valiant 18.1 6 225 105
## Independent.drat Independent.wt Independent.qsec
## Mazda RX4 3.90 2.620 16.46
## Mazda RX4 Wag 3.90 2.875 17.02
## Datsun 710 3.85 2.320 18.61
## Hornet 4 Drive 3.08 3.215 19.44
## Hornet Sportabout 3.15 3.440 17.02
## Valiant 2.76 3.460 20.22
## Independent.vs Independent.am Independent.gear
## Mazda RX4 0 1 4
## Mazda RX4 Wag 0 1 4
## Datsun 710 1 1 4
## Hornet 4 Drive 1 0 3
## Hornet Sportabout 0 0 3
## Valiant 1 0 3
## Independent.carb Independent.kmpl
## Mazda RX4 4 8.928024
## Mazda RX4 Wag 4 8.928024
## Datsun 710 1 9.693283
## Hornet 4 Drive 1 9.098082
## Hornet Sportabout 2 7.950193
## Valiant 1 7.695106
cleaned_data <- na.omit(data)
data_no_duplicates <- distinct(cleaned_data)
sorted_data <- data[order(-data$mpg),]
colnames(data)[colnames(data) == "mpg"] <- "Miles_Per_Gallon"
colnames(data)[colnames(data) == "hp"] <- "Horse_Power"
data$Double_Weight <- data$wt * 2
set.seed(123)
train_index <- sample(seq_len(nrow(data)), size = 0.8 * nrow(data))
train_data <- data[train_index, ]
test_data <- data[-train_index, ]
summary(data)
## Miles_Per_Gallon cyl disp Horse_Power
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb kmpl
## Min. :0.0000 Min. :3.000 Min. :1.000 Min. : 4.421
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.: 6.558
## Median :0.0000 Median :4.000 Median :2.000 Median : 8.163
## Mean :0.4062 Mean :3.688 Mean :2.812 Mean : 8.541
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.: 9.693
## Max. :1.0000 Max. :5.000 Max. :8.000 Max. :14.412
## Double_Weight
## Min. : 3.026
## 1st Qu.: 5.162
## Median : 6.650
## Mean : 6.434
## 3rd Qu.: 7.220
## Max. :10.848
mean_mpg <- mean(data$Miles_Per_Gallon)
median_mpg <- median(data$Miles_Per_Gallon)
mode_mpg <- as.numeric(names(sort(table(data$Miles_Per_Gallon), decreasing=TRUE)[1]))
range_mpg <- range(data$Miles_Per_Gallon)
print(paste("Mean MPG:", mean_mpg))
## [1] "Mean MPG: 20.090625"
print(paste("Median MPG:", median_mpg))
## [1] "Median MPG: 19.2"
print(paste("Mode MPG:", mode_mpg))
## [1] "Mode MPG: 10.4"
print(paste("Range MPG:", range_mpg))
## [1] "Range MPG: 10.4" "Range MPG: 33.9"
ggplot(data, aes(x = Horse_Power, y = Miles_Per_Gallon)) +
geom_point(color = "blue") +
ggtitle("Scatter Plot: Horse Power vs Miles Per Gallon") +
xlab("Horse Power") +
ylab("Miles Per Gallon")
ggplot(data, aes(x = as.factor(cyl), y = Miles_Per_Gallon, fill = as.factor(cyl))) +
geom_bar(stat="identity") +
ggtitle("Bar Plot: MPG by Cylinders") +
xlab("Number of Cylinders") +
ylab("Miles Per Gallon") +
scale_fill_discrete(name = "Cylinders")
correlation <- cor(data$Miles_Per_Gallon, data$Horse_Power)
print(paste("Correlation between MPG and HP:", correlation))
## [1] "Correlation between MPG and HP: -0.776168371826586"
linear_model <- lm(Miles_Per_Gallon ~ Horse_Power, data = data)
summary(linear_model)
##
## Call:
## lm(formula = Miles_Per_Gallon ~ Horse_Power, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.7121 -2.1122 -0.8854 1.5819 8.2360
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.09886 1.63392 18.421 < 2e-16 ***
## Horse_Power -0.06823 0.01012 -6.742 1.79e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.863 on 30 degrees of freedom
## Multiple R-squared: 0.6024, Adjusted R-squared: 0.5892
## F-statistic: 45.46 on 1 and 30 DF, p-value: 1.788e-07