library(ggplot2)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
###Step-1:Load the libraries
library(ggplot2)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
###Step-2: Load the dataset
# Check files in current folder
list.files() [1] "abcd.qmd" "automobile"
[3] "class 1.docx" "class 1.html"
[5] "class 1.qmd" "class 1_files"
[7] "imports-85.data" "imports-85.names"
[9] "Index" "misc"
[11] "Program-12.docx" "Program-12.qmd"
[13] "program-13.docx" "program-13.qmd"
[15] "Program-2_files" "Program 1.docx"
[17] "PROGRAM 11.docx" "PROGRAM 11.qmd"
[19] "Program 2.docx" "Program 2.html"
[21] "Program 2.qmd" "Program 2_files"
[23] "Program 3.docx" "Program 4.docx"
[25] "Program 4.qmd" "Program 5.docx"
[27] "Program 5.qmd" "program 6.docx"
[29] "program 6.qmd" "Program 7.docx"
[31] "Program 7.qmd" "Program 9.docx"
[33] "Program 9.qmd" "Programs"
[35] "rsconnect" "TeamCore(Khyathi,Nainitha).qmd"
[37] "TeamCore.docx" "TeamCore.html"
[39] "TeamCore.qmd" "TeamCore.rmarkdown"
[41] "TeamCore_files"
# Set file name correctly
file_path <- "imports-85.data"
# Assign column names
col_names <- c("symboling","normalized_losses","make","fuel_type","aspiration",
"num_doors","body_style","drive_wheels","engine_location",
"wheel_base","length","width","height","curb_weight",
"engine_type","num_cylinders","engine_size","fuel_system",
"bore","stroke","compression_ratio","horsepower","peak_rpm",
"city_mpg","highway_mpg","price")
# Read dataset
auto_data <- read.csv(file_path,
header = FALSE,
na.strings = "?",
col.names = col_names)
# Convert variables
auto_data$engine_size <- as.numeric(auto_data$engine_size)
auto_data$price <- as.numeric(auto_data$price)
# Remove missing values
auto_data <- na.omit(auto_data)###Step-3:Scatter plot: Engine Size VS Price
ggplot(auto_data, aes(x = engine_size, y = price, color = fuel_type)) +
geom_point(size = 3, alpha = 0.7) +
labs(
title = "Engine Size vs Car Price",
subtitle = "Relationship between engine size and price",
x = "Engine Size",
y = "Car Price",
color = "Fuel Type",
caption = "Source: Automobile Dataset"
) +
theme_minimal() +
theme(
legend.position = "top"
)###Step-4:REgression Line Visualization
ggplot(auto_data, aes(x = engine_size, y = price, color = fuel_type)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(
title = "Regression Analysis: Engine Size vs Price",
subtitle = "Linear model showing upward trend",
x = "Engine Size",
y = "Car Price",
color = "Fuel Type",
caption = "Regression line indicates strong positive correlation"
) +
theme_light() +
theme(legend.position = "top")`geom_smooth()` using formula = 'y ~ x'
###Step-5: Histogram of prices
ggplot(auto_data, aes(x = price, fill = fuel_type)) +
geom_histogram(bins = 30, color = "black", alpha = 0.8) +
labs(
title = "Distribution of Car Prices",
subtitle = "Frequency distribution of prices",
x = "Car Price",
y = "Frequency",
fill = "Fuel Type",
caption = "Most cars fall within mid-range prices"
) +
theme_minimal() +
theme(legend.position = "top")###Step-6:Faceted Plot by Fuel Type
ggplot(auto_data, aes(x = engine_size, y = price, color = fuel_type)) +
geom_point(size = 2.5) +
facet_wrap(~ fuel_type) +
labs(
title = "Engine Size vs Price by Fuel Type",
subtitle = "Comparison across fuel categories",
x = "Engine Size",
y = "Car Price",
color = "Fuel Type",
caption = "Faceted visualization for better comparison"
) +
theme_bw() +
theme(legend.position = "top")This program performs a structured analysis of the Automobile dataset by cleaning the data and then visualizing key relationships using ggplot2. It shows that engine size has a strong positive correlation with car price, confirmed through scatter plots and regression lines, while histograms reveal that most cars are priced in the mid-range. Finally, faceted plots by fuel type highlight differences in pricing trends between gasoline and diesel vehicles, making the overall interpretation clear: engine size is a major driver of car price, fuel type influences the variation, and the dataset is dominated by mid-priced cars.