Automobile

Author

TeamCore(Khyathi,Nainitha)

###Step-1:Load the libraries

library(ggplot2)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

###Step-2: Load the dataset

# Check files in current folder
list.files()
 [1] "abcd.qmd"                       "automobile"                    
 [3] "class 1.docx"                   "class 1.html"                  
 [5] "class 1.qmd"                    "class 1_files"                 
 [7] "imports-85.data"                "imports-85.names"              
 [9] "Index"                          "misc"                          
[11] "Program-12.docx"                "Program-12.qmd"                
[13] "program-13.docx"                "program-13.qmd"                
[15] "Program-2_files"                "Program 1.docx"                
[17] "PROGRAM 11.docx"                "PROGRAM 11.qmd"                
[19] "Program 2.docx"                 "Program 2.html"                
[21] "Program 2.qmd"                  "Program 2_files"               
[23] "Program 3.docx"                 "Program 4.docx"                
[25] "Program 4.qmd"                  "Program 5.docx"                
[27] "Program 5.qmd"                  "program 6.docx"                
[29] "program 6.qmd"                  "Program 7.docx"                
[31] "Program 7.qmd"                  "Program 9.docx"                
[33] "Program 9.qmd"                  "Programs"                      
[35] "rsconnect"                      "TeamCore(Khyathi,Nainitha).qmd"
[37] "TeamCore.docx"                  "TeamCore.html"                 
[39] "TeamCore.qmd"                   "TeamCore.rmarkdown"            
[41] "TeamCore_files"                
# Set file name correctly
file_path <- "imports-85.data"

# Assign column names
col_names <- c("symboling","normalized_losses","make","fuel_type","aspiration",
               "num_doors","body_style","drive_wheels","engine_location",
               "wheel_base","length","width","height","curb_weight",
               "engine_type","num_cylinders","engine_size","fuel_system",
               "bore","stroke","compression_ratio","horsepower","peak_rpm",
               "city_mpg","highway_mpg","price")

# Read dataset
auto_data <- read.csv(file_path,
                      header = FALSE,
                      na.strings = "?",
                      col.names = col_names)

# Convert variables
auto_data$engine_size <- as.numeric(auto_data$engine_size)
auto_data$price <- as.numeric(auto_data$price)

# Remove missing values
auto_data <- na.omit(auto_data)

###Step-3:Scatter plot: Engine Size VS Price

ggplot(auto_data, aes(x = engine_size, y = price, color = fuel_type)) +
  geom_point(size = 3, alpha = 0.7) +
  labs(
    title = "Engine Size vs Car Price",
    subtitle = "Relationship between engine size and price",
    x = "Engine Size",
    y = "Car Price",
    color = "Fuel Type",
    caption = "Source: Automobile Dataset"
  ) +
  theme_minimal() +
  theme(
    legend.position = "top"
  )

###Step-4:REgression Line Visualization

ggplot(auto_data, aes(x = engine_size, y = price, color = fuel_type)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(
    title = "Regression Analysis: Engine Size vs Price",
    subtitle = "Linear model showing upward trend",
    x = "Engine Size",
    y = "Car Price",
    color = "Fuel Type",
    caption = "Regression line indicates strong positive correlation"
  ) +
  theme_light() +
  theme(legend.position = "top")
`geom_smooth()` using formula = 'y ~ x'

###Step-5: Histogram of prices

ggplot(auto_data, aes(x = price, fill = fuel_type)) +
  geom_histogram(bins = 30, color = "black", alpha = 0.8) +
  labs(
    title = "Distribution of Car Prices",
    subtitle = "Frequency distribution of prices",
    x = "Car Price",
    y = "Frequency",
    fill = "Fuel Type",
    caption = "Most cars fall within mid-range prices"
  ) +
  theme_minimal() +
  theme(legend.position = "top")

###Step-6:Faceted Plot by Fuel Type

ggplot(auto_data, aes(x = engine_size, y = price, color = fuel_type)) +
  geom_point(size = 2.5) +
  facet_wrap(~ fuel_type) +
  labs(
    title = "Engine Size vs Price by Fuel Type",
    subtitle = "Comparison across fuel categories",
    x = "Engine Size",
    y = "Car Price",
    color = "Fuel Type",
    caption = "Faceted visualization for better comparison"
  ) +
  theme_bw() +
  theme(legend.position = "top")

Interpretation

This program performs a structured analysis of the Automobile dataset by cleaning the data and then visualizing key relationships using ggplot2. It shows that engine size has a strong positive correlation with car price, confirmed through scatter plots and regression lines, while histograms reveal that most cars are priced in the mid-range. Finally, faceted plots by fuel type highlight differences in pricing trends between gasoline and diesel vehicles, making the overall interpretation clear: engine size is a major driver of car price, fuel type influences the variation, and the dataset is dominated by mid-priced cars.