library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'ggplot2' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(readr)
data<-read.csv("C:/MCA/MCA-2SEM/R Programming/PROJECT/car_rental_dataset.csv")
#print(data)
head(data)
## Make Model Price Year Kilometer
## 1 Honda Amaze 1.2 VX i-VTEC 505000 2017 87150
## 2 Maruti Suzuki Swift DZire VDI 450000 2014 75000
## 3 Hyundai i10 Magna 1.2 Kappa2 220000 2011 67000
## 4 Toyota Glanza G 799000 2019 37500
## 5 Toyota Innova 2.4 VX 7 STR [2016-2020] 1950000 2018 69000
## 6 Maruti Suzuki Ciaz ZXi 675000 2017 73315
## Fuel.Type Transmission Location Color Owner Seller.Type Engine
## 1 Petrol Manual Pune Grey First Corporate 1198 cc
## 2 Diesel Manual Ludhiana White Second Individual 1248 cc
## 3 Petrol Manual Lucknow Maroon First Individual 1197 cc
## 4 Petrol Manual Mangalore Red First Individual 1197 cc
## 5 Diesel Manual Mumbai Grey First Individual 2393 cc
## 6 Petrol Manual Pune Grey First Individual 1373 cc
## Max.Power Max.Torque Drivetrain Length Width Height
## 1 87 bhp @ 6000 rpm 109 Nm @ 4500 rpm FWD 3990 1680 1505
## 2 74 bhp @ 4000 rpm 190 Nm @ 2000 rpm FWD 3995 1695 1555
## 3 79 bhp @ 6000 rpm 112.7619 Nm @ 4000 rpm FWD 3585 1595 1550
## 4 82 bhp @ 6000 rpm 113 Nm @ 4200 rpm FWD 3995 1745 1510
## 5 148 bhp @ 3400 rpm 343 Nm @ 1400 rpm RWD 4735 1830 1795
## 6 91 bhp @ 6000 rpm 130 Nm @ 4000 rpm FWD 4490 1730 1485
## Seating.Capacity Fuel.Tank.Capacity
## 1 5 35
## 2 5 42
## 3 5 35
## 4 5 37
## 5 7 55
## 6 5 43
tail(data)
## Make Model Price Year Kilometer
## 2054 Maruti Suzuki Ritz Vxi (ABS) BS-IV 245000 2014 79000
## 2055 Mahindra XUV500 W8 [2015-2017] 850000 2016 90300
## 2056 Hyundai Eon D-Lite + 275000 2014 83000
## 2057 Ford Figo Duratec Petrol ZXI 1.2 240000 2013 73000
## 2058 BMW 5-Series 520d Luxury Line [2017-2019] 4290000 2018 60474
## 2059 Mahindra Bolero Power Plus ZLX [2016-2019] 670000 2017 72000
## Fuel.Type Transmission Location Color Owner Seller.Type Engine
## 2054 Petrol Manual Faridabad White Second Individual 1197 cc
## 2055 Diesel Manual Surat White First Individual 2179 cc
## 2056 Petrol Manual Ahmedabad White Second Individual 814 cc
## 2057 Petrol Manual Thane Silver First Individual 1196 cc
## 2058 Diesel Automatic Coimbatore White First Individual 1995 cc
## 2059 Diesel Manual Guwahati White First Individual 1493 cc
## Max.Power Max.Torque Drivetrain Length Width Height
## 2054 85 bhp @ 6000 rpm 113 Nm @ 4500 rpm FWD 3775 1680 1620
## 2055 138 bhp @ 3750 rpm 330 Nm @ 1600 rpm FWD 4585 1890 1785
## 2056 55 bhp @ 5500 rpm 75 Nm @ 4000 rpm FWD 3495 1550 1500
## 2057 70 bhp @ 6250 rpm 102 Nm @ 4000 rpm FWD 3795 1680 1427
## 2058 188 bhp @ 4000 rpm 400 Nm @ 1750 rpm RWD 4936 1868 1479
## 2059 70 bhp @ 3600 rpm 195 Nm @ 1400 rpm RWD 3995 1745 1880
## Seating.Capacity Fuel.Tank.Capacity
## 2054 5 43
## 2055 7 70
## 2056 5 32
## 2057 5 45
## 2058 5 65
## 2059 7 NA
str(data)
## 'data.frame': 2059 obs. of 20 variables:
## $ Make : chr "Honda" "Maruti Suzuki" "Hyundai" "Toyota" ...
## $ Model : chr "Amaze 1.2 VX i-VTEC" "Swift DZire VDI" "i10 Magna 1.2 Kappa2" "Glanza G" ...
## $ Price : int 505000 450000 220000 799000 1950000 675000 1898999 2650000 1390000 575000 ...
## $ Year : int 2017 2014 2011 2019 2018 2017 2015 2017 2017 2015 ...
## $ Kilometer : int 87150 75000 67000 37500 69000 73315 47000 75000 56000 85000 ...
## $ Fuel.Type : chr "Petrol" "Diesel" "Petrol" "Petrol" ...
## $ Transmission : chr "Manual" "Manual" "Manual" "Manual" ...
## $ Location : chr "Pune" "Ludhiana" "Lucknow" "Mangalore" ...
## $ Color : chr "Grey" "White" "Maroon" "Red" ...
## $ Owner : chr "First" "Second" "First" "First" ...
## $ Seller.Type : chr "Corporate" "Individual" "Individual" "Individual" ...
## $ Engine : chr "1198 cc" "1248 cc" "1197 cc" "1197 cc" ...
## $ Max.Power : chr "87 bhp @ 6000 rpm" "74 bhp @ 4000 rpm" "79 bhp @ 6000 rpm" "82 bhp @ 6000 rpm" ...
## $ Max.Torque : chr "109 Nm @ 4500 rpm" "190 Nm @ 2000 rpm" "112.7619 Nm @ 4000 rpm" "113 Nm @ 4200 rpm" ...
## $ Drivetrain : chr "FWD" "FWD" "FWD" "FWD" ...
## $ Length : int 3990 3995 3585 3995 4735 4490 4630 4439 4670 4331 ...
## $ Width : int 1680 1695 1595 1745 1830 1730 1777 1821 1814 1822 ...
## $ Height : int 1505 1555 1550 1510 1795 1485 1432 1612 1476 1671 ...
## $ Seating.Capacity : int 5 5 5 5 7 5 5 5 5 5 ...
## $ Fuel.Tank.Capacity: num 35 42 35 37 55 43 NA 51 50 50 ...
dim(data)
## [1] 2059 20
# it return column name
colnames(data)
## [1] "Make" "Model" "Price"
## [4] "Year" "Kilometer" "Fuel.Type"
## [7] "Transmission" "Location" "Color"
## [10] "Owner" "Seller.Type" "Engine"
## [13] "Max.Power" "Max.Torque" "Drivetrain"
## [16] "Length" "Width" "Height"
## [19] "Seating.Capacity" "Fuel.Tank.Capacity"
summary(data)
## Make Model Price Year
## Length:2059 Length:2059 Min. : 49000 Min. :1988
## Class :character Class :character 1st Qu.: 484999 1st Qu.:2014
## Mode :character Mode :character Median : 825000 Median :2017
## Mean : 1702992 Mean :2016
## 3rd Qu.: 1925000 3rd Qu.:2019
## Max. :35000000 Max. :2022
##
## Kilometer Fuel.Type Transmission Location
## Min. : 0 Length:2059 Length:2059 Length:2059
## 1st Qu.: 29000 Class :character Class :character Class :character
## Median : 50000 Mode :character Mode :character Mode :character
## Mean : 54225
## 3rd Qu.: 72000
## Max. :2000000
##
## Color Owner Seller.Type Engine
## Length:2059 Length:2059 Length:2059 Length:2059
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Max.Power Max.Torque Drivetrain Length
## Length:2059 Length:2059 Length:2059 Min. :3099
## Class :character Class :character Class :character 1st Qu.:3985
## Mode :character Mode :character Mode :character Median :4370
## Mean :4281
## 3rd Qu.:4629
## Max. :5569
## NA's :64
## Width Height Seating.Capacity Fuel.Tank.Capacity
## Min. :1475 Min. :1165 Min. :2.000 Min. : 15.00
## 1st Qu.:1695 1st Qu.:1485 1st Qu.:5.000 1st Qu.: 41.25
## Median :1770 Median :1545 Median :5.000 Median : 50.00
## Mean :1768 Mean :1592 Mean :5.306 Mean : 52.00
## 3rd Qu.:1832 3rd Qu.:1675 3rd Qu.:5.000 3rd Qu.: 60.00
## Max. :2220 Max. :1995 Max. :8.000 Max. :105.00
## NA's :64 NA's :64 NA's :64 NA's :113
#checking missing value
colSums(is.na(data))
## Make Model Price Year
## 0 0 0 0
## Kilometer Fuel.Type Transmission Location
## 0 0 0 0
## Color Owner Seller.Type Engine
## 0 0 0 0
## Max.Power Max.Torque Drivetrain Length
## 0 0 0 64
## Width Height Seating.Capacity Fuel.Tank.Capacity
## 64 64 64 113
# filling the missing value using median
data$Length[is.na(data$Length)] <- median(data$Length, na.rm = TRUE)
data$Width[is.na(data$Width)] <- median(data$Width, na.rm = TRUE)
data$Height[is.na(data$Height)]<- median(data$Height, na.rm = TRUE)
data$Seating.Capacity[is.na(data$Seating.Capacity)]<-median(data$Seating.Capacity, na.rm = TRUE)
data$Fuel.Tank.Capacity[is.na(data$Fuel.Tank.Capacity)]<-median(data$Fuel.Tank.Capacity, na.rm = TRUE)
# After filling the missing value here i am checking again is na value present or not
colSums(is.na(data))
## Make Model Price Year
## 0 0 0 0
## Kilometer Fuel.Type Transmission Location
## 0 0 0 0
## Color Owner Seller.Type Engine
## 0 0 0 0
## Max.Power Max.Torque Drivetrain Length
## 0 0 0 0
## Width Height Seating.Capacity Fuel.Tank.Capacity
## 0 0 0 0
# checking duplicate
data<-data[!duplicated(data),]
#print(data)
str(data)
## 'data.frame': 2059 obs. of 20 variables:
## $ Make : chr "Honda" "Maruti Suzuki" "Hyundai" "Toyota" ...
## $ Model : chr "Amaze 1.2 VX i-VTEC" "Swift DZire VDI" "i10 Magna 1.2 Kappa2" "Glanza G" ...
## $ Price : int 505000 450000 220000 799000 1950000 675000 1898999 2650000 1390000 575000 ...
## $ Year : int 2017 2014 2011 2019 2018 2017 2015 2017 2017 2015 ...
## $ Kilometer : int 87150 75000 67000 37500 69000 73315 47000 75000 56000 85000 ...
## $ Fuel.Type : chr "Petrol" "Diesel" "Petrol" "Petrol" ...
## $ Transmission : chr "Manual" "Manual" "Manual" "Manual" ...
## $ Location : chr "Pune" "Ludhiana" "Lucknow" "Mangalore" ...
## $ Color : chr "Grey" "White" "Maroon" "Red" ...
## $ Owner : chr "First" "Second" "First" "First" ...
## $ Seller.Type : chr "Corporate" "Individual" "Individual" "Individual" ...
## $ Engine : chr "1198 cc" "1248 cc" "1197 cc" "1197 cc" ...
## $ Max.Power : chr "87 bhp @ 6000 rpm" "74 bhp @ 4000 rpm" "79 bhp @ 6000 rpm" "82 bhp @ 6000 rpm" ...
## $ Max.Torque : chr "109 Nm @ 4500 rpm" "190 Nm @ 2000 rpm" "112.7619 Nm @ 4000 rpm" "113 Nm @ 4200 rpm" ...
## $ Drivetrain : chr "FWD" "FWD" "FWD" "FWD" ...
## $ Length : int 3990 3995 3585 3995 4735 4490 4630 4439 4670 4331 ...
## $ Width : int 1680 1695 1595 1745 1830 1730 1777 1821 1814 1822 ...
## $ Height : int 1505 1555 1550 1510 1795 1485 1432 1612 1476 1671 ...
## $ Seating.Capacity : int 5 5 5 5 7 5 5 5 5 5 ...
## $ Fuel.Tank.Capacity: num 35 42 35 37 55 43 50 51 50 50 ...
# Conversion----------
data$Make<-as.factor(data$Make)
data$Model<-as.factor(data$Model)
data$Fuel.Type<-as.factor(data$Fuel.Type)
data$Transmission<-as.factor(data$Transmission)
data$Location<-as.factor(data$Location)
data$Owner<-as.factor(data$Owner)
data$Seller.Type<-as.factor(data$Seller.Type)
data$Drivetrain<-as.factor(data$Drivetrain)
str(data)
## 'data.frame': 2059 obs. of 20 variables:
## $ Make : Factor w/ 33 levels "Audi","BMW","Chevrolet",..: 8 18 9 31 31 18 20 2 28 24 ...
## $ Model : Factor w/ 1050 levels "2 Series Gran Coupe 220d M Sport [2020-2021]",..: 106 831 551 480 578 211 247 980 654 858 ...
## $ Price : int 505000 450000 220000 799000 1950000 675000 1898999 2650000 1390000 575000 ...
## $ Year : int 2017 2014 2011 2019 2018 2017 2015 2017 2017 2015 ...
## $ Kilometer : int 87150 75000 67000 37500 69000 73315 47000 75000 56000 85000 ...
## $ Fuel.Type : Factor w/ 9 levels "CNG","CNG + CNG",..: 7 3 7 7 3 7 7 3 7 3 ...
## $ Transmission : Factor w/ 2 levels "Automatic","Manual": 2 2 2 2 2 2 1 1 1 2 ...
## $ Location : Factor w/ 77 levels "Agra","Ahmedabad",..: 57 41 40 42 46 57 46 13 46 46 ...
## $ Color : chr "Grey" "White" "Maroon" "Red" ...
## $ Owner : Factor w/ 6 levels "4 or More","First",..: 2 4 2 2 2 2 4 4 2 2 ...
## $ Seller.Type : Factor w/ 3 levels "Commercial Registration",..: 2 3 3 3 3 3 3 3 3 3 ...
## $ Engine : chr "1198 cc" "1248 cc" "1197 cc" "1197 cc" ...
## $ Max.Power : chr "87 bhp @ 6000 rpm" "74 bhp @ 4000 rpm" "79 bhp @ 6000 rpm" "82 bhp @ 6000 rpm" ...
## $ Max.Torque : chr "109 Nm @ 4500 rpm" "190 Nm @ 2000 rpm" "112.7619 Nm @ 4000 rpm" "113 Nm @ 4200 rpm" ...
## $ Drivetrain : Factor w/ 4 levels "","AWD","FWD",..: 3 3 3 3 4 3 3 2 3 3 ...
## $ Length : int 3990 3995 3585 3995 4735 4490 4630 4439 4670 4331 ...
## $ Width : int 1680 1695 1595 1745 1830 1730 1777 1821 1814 1822 ...
## $ Height : int 1505 1555 1550 1510 1795 1485 1432 1612 1476 1671 ...
## $ Seating.Capacity : int 5 5 5 5 7 5 5 5 5 5 ...
## $ Fuel.Tank.Capacity: num 35 42 35 37 55 43 50 51 50 50 ...
Q1: What is the total number of bookings recorded in the dataset?
nrow(data)
## [1] 2059
Q2: How many unique car models are available in the dataset?
length(unique(data$Model))
## [1] 1050
Q3: Which car model is booked the most frequently?
model_count <- sort(table(data$Model), decreasing = TRUE)
head(model_count, 1)
##
## X1 sDrive20d xLine
## 15
Q4: Which are the top 5 most frequently booked cars?
head(sort(table(data$Model), decreasing = TRUE), 5)
##
## X1 sDrive20d xLine Swift DZire VDI
## 15 14
## City V Fortuner 2.8 4x2 AT [2016-2020]
## 13 13
## Swift DZire VXI
## 12
Q5: What is the average rental price of cars?
mean(data$Price, na.rm = TRUE)
## [1] 1702992
Q6: What is the highest rental price in the dataset?
max(data$Price, na.rm = TRUE)
## [1] 35000000
Q7: Which cars belong to the top 5 highest price category?
head(data[order(-data$Price), ], 5)
## Make Model Price Year Kilometer
## 484 Ferrari 488 GTB 35000000 2018 9500
## 1306 Land Rover Range Rover 3.0 V6 Diesel Vogue LWB 27500000 2020 11000
## 511 Lamborghini Huracan LP 610-4 24000000 2016 6000
## 583 Land Rover Range Rover 3.0 V6 Diesel Vogue 22000000 2019 35000
## 1247 Rolls-Royce Ghost Extended Wheelbase 20000000 2011 27000
## Fuel.Type Transmission Location Color Owner Seller.Type Engine
## 484 Petrol Automatic Delhi Black First Individual 3902 cc
## 1306 Diesel Automatic Hyderabad White Second Individual 2993 cc
## 511 Petrol Automatic Delhi Blue First Individual 5204 cc
## 583 Diesel Automatic Pune Blue First Individual 2993 cc
## 1247 Petrol Automatic Delhi Blue Third Individual 6592 cc
## Max.Power Max.Torque Drivetrain Length Width Height
## 484 660 bhp @ 8000 rpm 760 Nm @ 3000 rpm RWD 4568 1952 1213
## 1306 244 bhp @ 4000 rpm 600 Nm @ 2000 rpm AWD 5199 2220 1840
## 511 602 bhp @ 8250 rpm 560 Nm @ 6500 rpm AWD 4459 1924 1165
## 583 244 bhp @ 4000 rpm 600 Nm @ 2000 rpm AWD 4999 2220 1835
## 1247 570 bhp @ 5250 rpm 780 Nm @ 1500 rpm RWD 5569 1948 1550
## Seating.Capacity Fuel.Tank.Capacity
## 484 2 78.0
## 1306 4 105.0
## 511 2 50.0
## 583 5 105.0
## 1247 5 82.5
Q8: Which cars are in the lowest price category?
head(data[order(data$Price), ], 5)
## Make Model Price Year Kilometer Fuel.Type
## 492 Tata Nano Base 49000 2010 60000 Petrol
## 488 Maruti Suzuki Zen LXi BS-II 71001 2004 65082 Petrol
## 825 Honda City 1.5 EXi 100000 2002 65494 Petrol
## 460 Tata Manza Aqua Safire BS-IV 114999 2012 93000 Petrol
## 118 Hyundai Santro GL LPG 120000 2009 48500 LPG
## Transmission Location Color Owner Seller.Type Engine Max.Power
## 492 Manual Gurgaon White First Individual 624 cc 35@5250
## 488 Manual Kolkata Yellow First Individual
## 825 Manual Ranchi Beige First Individual
## 460 Manual Unnao Silver Second Individual 1368 cc 90@6000
## 118 Manual Lucknow Grey Fourth Individual 1086 cc 63@5500
## Max.Torque Drivetrain Length Width Height Seating.Capacity
## 492 48@3000 RWD 3099 1495 1652 4
## 488 4370 1770 1545 5
## 825 4370 1770 1545 5
## 460 116@4750 4413 1703 1550 5
## 118 89@3000 3565 1525 1590 5
## Fuel.Tank.Capacity
## 492 15
## 488 50
## 825 50
## 460 44
## 118 35
Q9: Which fuel type is most commonly used among cars?
sort(table(data$Fuel.Type), decreasing = TRUE)
##
## Diesel Petrol CNG Electric LPG Hybrid
## 1049 942 50 7 5 3
## CNG + CNG Petrol + CNG Petrol + LPG
## 1 1 1
Q10: What is the percentage distribution of each fuel type?
prop.table(table(data$Fuel.Type )) * 100
##
## CNG CNG + CNG Diesel Electric Hybrid LPG
## 2.42836328 0.04856727 50.94706168 0.33997086 0.14570180 0.24283633
## Petrol Petrol + CNG Petrol + LPG
## 45.75036425 0.04856727 0.04856727
Q11: Which fuel type has the highest average rental price?
aggregate(Price ~ Fuel.Type, data, mean)
## Fuel.Type Price
## 1 CNG 445380
## 2 CNG + CNG 249000
## 3 Diesel 2101848
## 4 Electric 1497857
## 5 Hybrid 7166667
## 6 LPG 210000
## 7 Petrol 1322041
## 8 Petrol + CNG 575000
## 9 Petrol + LPG 130000
ggplot(data, aes(x = Fuel.Type, y = Price, fill = Fuel.Type)) +
geom_bar(stat = "identity")+
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
Q12: How many cars are manual and how many are automatic?
table(data$Transmission)
##
## Automatic Manual
## 926 1133
Q13: Do automatic cars cost more than manual cars?
trans_price <- aggregate(Price ~ Transmission, data, mean)
ggplot(trans_price, aes(x = Transmission, y = Price, fill = Transmission)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
Q14: What is the average price of automatic cars?
mean(data$Price[data$Transmission=="Automatic"], na.rm = TRUE)
## [1] 2979057
Q15: Which car has the highest mileage?
data[which.max(data$Kilometer), ]
## Make Model Price Year Kilometer Fuel.Type
## 1126 Renault Duster 110 PS RXZ 4X2 MT Diesel 450000 2016 2000000 Diesel
## Transmission Location Color Owner Seller.Type Engine Max.Power
## 1126 Manual Ranchi White First Individual 1461 cc 108 bhp @ 3900 rpm
## Max.Torque Drivetrain Length Width Height Seating.Capacity
## 1126 248 Nm @ 2250 rpm FWD 4315 1822 1695 5
## Fuel.Tank.Capacity
## 1126 50
Q16: How does mileage relate to rental price?
cor(data$Kilometer, data$Price, use="complete.obs")
## [1] -0.1508251
ggplot(data[data$Kilometer < 200000, ], aes(x = Kilometer, y = Price)) +
geom_point(alpha=0.5,color = "blue") +
geom_smooth(method = "lm", color = "red") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
Q17: Which city has the highest number of bookings?
city_count <- table(data$Location)
city_count[which.max(city_count)]
## Mumbai
## 342
Q18: Which are the top 5 cities with highest bookings?
head(sort(table(data$Location), decreasing = TRUE), 5)
##
## Mumbai Delhi Pune Bangalore Hyderabad
## 342 307 144 132 116
Q20: Which city generates more total revenue?
city_rev <- aggregate(Price ~ Location, data, sum)
top5 <- city_rev[order(-city_rev$Price), ][1:5, ]
ggplot(top5, aes(x = Location, y = Price, fill = Location)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
Q21: How many bookings generate above-average revenue?
avg_price <- mean(data$Price, na.rm = TRUE)
sum(data$Price > avg_price, na.rm = TRUE)
## [1] 584
Q22: What is the average manufacturing year of cars?
round(mean(data$Year, na.rm = TRUE))
## [1] 2016
Q23: Which is the oldest car in the dataset?
data[which.min(data$Year), ]
## Make Model Price Year Kilometer Fuel.Type
## 996 Mercedes-Benz E-Class E 200 Avantgarde 1300000 1988 71000 Petrol
## Transmission Location Color Owner Seller.Type Engine Max.Power
## 996 Automatic Dehradun Red Third Individual 1991 cc 181 bhp @ 5500 rpm
## Max.Torque Drivetrain Length Width Height Seating.Capacity
## 996 300 Nm @ 1200 rpm RWD 5063 1860 1494 5
## Fuel.Tank.Capacity
## 996 80
Q24: Do 5-seater cars cost more or 7-seater cars?
seat_price <- aggregate(Price ~ Seating.Capacity, data, mean)
ggplot(seat_price, aes(x = Seating.Capacity, y = Price, fill = Seating.Capacity)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
#-----------------------------CA3-------------------------#
#---------------------------------------------------------#
Q-25:How does car age affect price?
data$Car_Age <- 2026 - data$'Year'
ggplot(data, aes(x = Car_Age, y = Price)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "red") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
## `geom_smooth()` using formula = 'y ~ x'
Q-26:How does the number of owners affect the car price?
data$Owner <- factor(data$Owner,
levels = c("First", "Second", "Third", "Fourth", "4 or More", "UnRegistered Car"))
ggplot(data, aes(x = Owner, y = Price)) +
geom_boxplot(fill = "skyblue") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
Q-27:How does fuel type affect car price?
fuel_price <- aggregate(Price ~ Fuel.Type, data, mean)
ggplot(fuel_price, aes(x = Fuel.Type, y = Price, fill = Fuel.Type)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
Q-28:What is the average price of cars for each transmission type?
trans_price <- aggregate(Price ~ Transmission, data, mean)
print(trans_price)
## Transmission Price
## 1 Automatic 2979057.2
## 2 Manual 660064.4
Q-29: What is the average price of cars in each city?
city_avg_price <- aggregate(Price ~ Location, data, mean)
head(city_avg_price)
## Location Price
## 1 Agra 788894.7
## 2 Ahmedabad 1507257.1
## 3 Allahabad 525000.0
## 4 Ambala Cantt 418333.3
## 5 Amritsar 483750.0
## 6 Aurangabad 843750.0
Q-30:What is the average kilometer driven for each fuel type?
fuel_km <- aggregate(Kilometer ~ Fuel.Type, data, mean)
print(fuel_km)
## Fuel.Type Kilometer
## 1 CNG 51718.78
## 2 CNG + CNG 56843.00
## 3 Diesel 65094.08
## 4 Electric 19842.86
## 5 Hybrid 15000.00
## 6 LPG 55044.20
## 7 Petrol 42574.41
## 8 Petrol + CNG 58000.00
## 9 Petrol + LPG 100000.00
Q-31:What is the average price of cars based on number of owners?
owner_price <- aggregate(Price ~ Owner, data, mean)
owner_price$Owner <- factor(owner_price$Owner,
levels = c("First", "Second", "Third", "Fourth", "4 or More", "UnRegistered Car"))
ggplot(owner_price, aes(x = Owner, y = Price, fill = Owner)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
Q-32:Which color cars are most common?
color_count <- table(data$Color)
print(color_count)
##
## Beige Black Blue Bronze Brown Gold Green Grey Maroon Orange Others
## 8 163 190 28 82 30 17 220 37 16 12
## Pink Purple Red Silver White Yellow
## 1 5 154 285 802 9
Q-33: How does torque affect car price?
data$Torque_Num <- as.numeric(gsub("[^0-9]", "", data$Max.Torque))
clean_data <- data[!is.na(data$Torque_Num) & !is.na(data$Price), ]
ggplot(clean_data, aes(x = Torque_Num, y = Price)) +
geom_point(alpha = 0.5, color = "blue") +
geom_smooth(method = "lm", color = "red") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
Q-34:Which cars have width greater than average?
avg_width <- mean(data$Width)
wide_cars <- data[data$Width > avg_width, ]
print(avg_width)
## [1] 1768.054
head(wide_cars)
## Make Model Price Year Kilometer
## 5 Toyota Innova 2.4 VX 7 STR [2016-2020] 1950000 2018 69000
## 7 Mercedes-Benz CLA 200 Petrol Sport 1898999 2015 47000
## 8 BMW X1 xDrive20d M Sport 2650000 2017 75000
## 9 Skoda Octavia 1.8 TSI Style Plus AT [2017] 1390000 2017 56000
## 10 Nissan Terrano XL (D) 575000 2015 85000
## 14 Tata Harrier XZ [2019-2020] 1695000 2019 20856
## Fuel.Type Transmission Location Color Owner Seller.Type Engine
## 5 Diesel Manual Mumbai Grey First Individual 2393 cc
## 7 Petrol Automatic Mumbai White Second Individual 1991 cc
## 8 Diesel Automatic Coimbatore White Second Individual 1995 cc
## 9 Petrol Automatic Mumbai White First Individual 1798 cc
## 10 Diesel Manual Mumbai White First Individual 1461 cc
## 14 Diesel Manual Bangalore Orange First Individual 1956 cc
## Max.Power Max.Torque Drivetrain Length Width Height
## 5 148 bhp @ 3400 rpm 343 Nm @ 1400 rpm RWD 4735 1830 1795
## 7 181 bhp @ 5500 rpm 300 Nm @ 1200 rpm FWD 4630 1777 1432
## 8 188 bhp @ 4000 rpm 400 Nm @ 1750 rpm AWD 4439 1821 1612
## 9 177 bhp @ 5100 rpm 250 Nm @ 1250 rpm FWD 4670 1814 1476
## 10 84 bhp @ 3750 rpm 200 Nm @ 1900 rpm FWD 4331 1822 1671
## 14 138 bhp @ 3750 rpm 350 Nm @ 1750 rpm FWD 4598 1894 1706
## Seating.Capacity Fuel.Tank.Capacity Car_Age Torque_Num
## 5 7 55 8 3431400
## 7 5 50 11 3001200
## 8 5 51 9 4001750
## 9 5 50 9 2501250
## 10 5 50 11 2001900
## 14 5 50 7 3501750
Q-35:How can we predict rental price using mileage (Linear Regression)?
model1 <- lm(Price ~ Kilometer, data=data)
summary(model1)
##
## Call:
## lm(formula = Price ~ Kilometer, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1761299 -1202649 -837118 266676 33012436
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.048e+06 7.257e+04 28.22 < 2e-16 ***
## Kilometer -6.363e+00 9.195e-01 -6.92 6.02e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2393000 on 2057 degrees of freedom
## Multiple R-squared: 0.02275, Adjusted R-squared: 0.02227
## F-statistic: 47.88 on 1 and 2057 DF, p-value: 6.023e-12
ggplot(data, aes(Kilometer, Price)) +
geom_point(alpha=0.5) +
geom_smooth(method="lm", color="red") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
Q-36:How can we predict price using multiple variables (Multiple Linear Regression)?
model2 <- lm(Price ~ Kilometer + Year + Engine, data = data)
data$Predicted_Price <- predict(model2)
ggplot(data, aes(x = Price, y = Predicted_Price)) +
geom_point(color = "blue", alpha = 0.5) +
geom_abline(slope = 1, intercept = 0, color = "red") +
scale_x_continuous(labels = function(x) format(x, scientific = FALSE)) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
Q-37:How does price change over years (Line Chart)?
year_price <- aggregate(Price ~ Year, data, mean)
ggplot(year_price, aes(Year, Price)) +
geom_line(color="blue") +
geom_point()
Q-38: What is the distribution of prices (Histogram)?
ggplot(data, aes(x = Price)) +
geom_histogram(fill = "blue", bins = 30) +
scale_x_continuous(labels = function(x) format(x, scientific = FALSE))
Q-39:How does engine size affect price?
ggplot(data, aes(x = Engine, y = Price)) +
geom_point() +
geom_smooth(method = "lm") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
## `geom_smooth()` using formula = 'y ~ x'
Q-40:How can we understand the relationship between car usage and rental price?
model <- lm(Price ~ Kilometer, data=data)
summary(model)
##
## Call:
## lm(formula = Price ~ Kilometer, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1761299 -1202649 -837118 266676 33012436
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.048e+06 7.257e+04 28.22 < 2e-16 ***
## Kilometer -6.363e+00 9.195e-01 -6.92 6.02e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2393000 on 2057 degrees of freedom
## Multiple R-squared: 0.02275, Adjusted R-squared: 0.02227
## F-statistic: 47.88 on 1 and 2057 DF, p-value: 6.023e-12
ggplot(data, aes(Kilometer, Price)) +
geom_point() +
geom_smooth(method="lm", color="red")
## `geom_smooth()` using formula = 'y ~ x'
Q-41:Is the relationship between mileage and price linear or non-linear?
model_lin <- lm(Price ~ Kilometer, data = data)
model_poly <- lm(Price ~ poly(Kilometer, 2), data = data)
ggplot(data, aes(x = Kilometer, y = Price)) +
geom_point(alpha = 0.5, color = "black") +
geom_smooth(method = "lm", color = "blue", se = FALSE) +
stat_smooth(method = "lm",
formula = y ~ poly(x, 2),
color = "red",
se = FALSE) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
Q-42:How can we compare actual and predicted prices?
model <- lm(Price ~ Kilometer + Year + Engine, data=data)
data$Pred <- predict(model)
ggplot(data, aes(Price, Pred)) +
geom_point() +
geom_abline(color="red")
Q-43:How can we check whether newer cars are priced higher than older cars?
data$Age <- 2026 - data$Year
ggplot(data, aes(x = Age, y = Price)) +
geom_point(alpha = 0.5, color = "blue") +
geom_smooth(method = "lm", color = "red") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
Q-44: How does price change when both mileage and engine size vary together?
ggplot(data, aes(x = Kilometer, y = Price)) +
geom_point(color = "blue", alpha = 0.5) +
geom_smooth(method = "lm", color = "red") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
Q-45:How can we compare predicted price trends with actual data visually?
model <- lm(Price ~ Kilometer + Year, data=data)
data$Pred <- predict(model)
ggplot(data, aes(Kilometer)) +
geom_point(aes(y=Price), color="blue") +
geom_line(aes(y=Pred), color="red")
Q-46:How can we analyze whether price depends on a combination of features and their interaction?
model <- lm(Price ~ Kilometer * Engine, data=data)
summary(model)
##
## Call:
## lm(formula = Price ~ Kilometer * Engine, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4789623 -239866 -18459 155527 18487455
##
## Coefficients: (20 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.859e+06 2.585e+05 7.191 9.26e-13 ***
## Kilometer -9.148e+00 3.912e+00 -2.338 0.019477 *
## Engine1047 cc -1.721e+06 4.289e+06 -0.401 0.688356
## Engine1061 cc -1.708e+06 1.520e+06 -1.124 0.261296
## Engine1086 cc -1.415e+06 4.798e+05 -2.948 0.003235 **
## Engine1120 cc -1.387e+06 2.011e+06 -0.690 0.490529
## Engine1186 cc -1.448e+06 1.750e+06 -0.827 0.408109
## Engine1196 cc -1.257e+06 6.127e+05 -2.052 0.040342 *
## Engine1197 cc -1.150e+06 3.053e+05 -3.766 0.000171 ***
## Engine1198 cc -1.281e+06 6.033e+05 -2.124 0.033821 *
## Engine1199 cc -1.120e+06 4.068e+05 -2.754 0.005949 **
## Engine1248 cc -1.083e+06 3.801e+05 -2.851 0.004409 **
## Engine1298 cc -1.557e+06 1.563e+06 -0.996 0.319304
## Engine1332 cc 2.391e+06 4.027e+06 0.594 0.552760
## Engine1336 cc -1.335e+06 8.262e+06 -0.162 0.871683
## Engine1353 cc 2.606e+04 6.241e+05 0.042 0.966703
## Engine1364 cc -1.399e+06 3.053e+06 -0.458 0.646782
## Engine1368 cc 6.186e+05 7.649e+05 0.809 0.418787
## Engine1373 cc -1.080e+06 6.817e+05 -1.585 0.113231
## Engine1395 cc 1.722e+06 9.060e+05 1.901 0.057515 .
## Engine1396 cc -8.851e+05 8.282e+05 -1.069 0.285321
## Engine1399 cc -1.269e+06 4.431e+06 -0.286 0.774589
## Engine1451 cc -8.219e+04 1.066e+06 -0.077 0.938531
## Engine1461 cc -1.345e+06 3.485e+05 -3.861 0.000117 ***
## Engine1462 cc -7.609e+05 5.319e+05 -1.430 0.152756
## Engine1493 cc -9.948e+04 4.237e+05 -0.235 0.814378
## Engine1495 cc -1.081e+06 1.123e+06 -0.962 0.335954
## Engine1496 cc 1.271e+06 6.673e+05 1.904 0.057018 .
## Engine1497 cc -3.324e+05 3.676e+05 -0.904 0.365974
## Engine1498 cc -6.390e+05 4.202e+05 -1.521 0.128483
## Engine1499 cc -1.029e+06 1.411e+06 -0.729 0.466141
## Engine1582 cc -9.482e+05 3.169e+05 -2.992 0.002806 **
## Engine1586 cc -1.840e+06 2.783e+06 -0.661 0.508640
## Engine1591 cc -4.012e+05 5.525e+05 -0.726 0.467845
## Engine1595 cc -9.408e+05 2.254e+06 -0.417 0.676379
## Engine1598 cc -9.699e+05 5.850e+05 -1.658 0.097535 .
## Engine1794 cc -1.429e+06 3.136e+06 -0.456 0.648616
## Engine1796 cc 1.758e+06 2.670e+06 0.659 0.510290
## Engine1797 cc -1.208e+06 1.570e+06 -0.769 0.441775
## Engine1798 cc 9.391e+05 5.496e+05 1.709 0.087666 .
## Engine1799 cc -1.362e+06 3.700e+06 -0.368 0.712764
## Engine1948 cc -3.747e+05 1.134e+06 -0.330 0.741070
## Engine1950 cc 5.178e+06 3.781e+05 13.694 < 2e-16 ***
## Engine1956 cc 3.931e+05 4.762e+05 0.825 0.409277
## Engine1968 cc 9.500e+05 3.864e+05 2.459 0.014031 *
## Engine1969 cc 3.874e+06 7.687e+05 5.039 5.14e-07 ***
## Engine1984 cc 3.285e+06 4.433e+05 7.410 1.90e-13 ***
## Engine1988 cc 8.377e+06 1.329e+06 6.302 3.66e-10 ***
## Engine1991 cc 3.677e+06 4.622e+05 7.955 3.09e-15 ***
## Engine1995 cc 2.609e+06 3.491e+05 7.474 1.19e-13 ***
## Engine1996 cc 1.896e+06 9.444e+05 2.007 0.044867 *
## Engine1997 cc 1.297e+06 4.455e+05 2.911 0.003642 **
## Engine1998 cc 2.547e+06 4.144e+05 6.146 9.68e-10 ***
## Engine1999 cc 2.625e+06 5.290e+05 4.961 7.63e-07 ***
## Engine2143 cc 1.621e+06 4.598e+05 3.525 0.000434 ***
## Engine2148 cc 3.960e+06 9.092e+05 4.356 1.40e-05 ***
## Engine2157 cc 1.204e+06 1.126e+06 1.069 0.285207
## Engine2179 cc 4.182e+05 4.027e+05 1.039 0.299107
## Engine2184 cc -1.023e+05 7.653e+05 -0.134 0.893694
## Engine2198 cc 1.177e+06 1.124e+06 1.048 0.294897
## Engine2199 cc 3.401e+06 4.354e+06 0.781 0.434856
## Engine2354 cc -1.041e+06 3.294e+06 -0.316 0.751925
## Engine2393 cc 7.951e+05 5.191e+05 1.532 0.125719
## Engine2400 cc 8.138e+05 1.125e+06 0.724 0.469381
## Engine2477 cc -1.757e+06 3.912e+06 -0.449 0.653445
## Engine2487 cc 3.772e+06 2.621e+06 1.439 0.150219
## Engine2489 cc -9.972e+05 8.187e+06 -0.122 0.903066
## Engine2494 cc 2.953e+06 5.829e+05 5.066 4.47e-07 ***
## Engine2497 cc 3.314e+06 2.996e+07 0.111 0.911951
## Engine2498 cc -9.394e+05 1.604e+06 -0.586 0.558123
## Engine2523 cc -8.583e+05 1.126e+06 -0.762 0.446118
## Engine2609 cc -4.657e+05 1.130e+06 -0.412 0.680196
## Engine2694 cc 7.432e+05 1.305e+06 0.569 0.569233
## Engine2696 cc -4.012e+05 3.277e+06 -0.122 0.902580
## Engine2755 cc 1.105e+06 4.682e+05 2.360 0.018394 *
## Engine2835 cc -8.679e+05 1.124e+06 -0.772 0.440017
## Engine2925 cc 1.325e+07 1.036e+06 12.794 < 2e-16 ***
## Engine2953 cc -2.313e+05 1.127e+06 -0.205 0.837370
## Engine2967 cc 3.068e+06 5.008e+05 6.128 1.09e-09 ***
## Engine2979 cc 7.507e+06 1.124e+06 6.680 3.14e-11 ***
## Engine2982 cc -4.661e+05 7.369e+05 -0.633 0.527097
## Engine2987 cc 5.992e+06 6.696e+05 8.948 < 2e-16 ***
## Engine2993 cc 7.870e+06 4.367e+05 18.020 < 2e-16 ***
## Engine2995 cc 1.261e+07 7.203e+05 17.507 < 2e-16 ***
## Engine2996 cc 1.087e+07 7.820e+05 13.905 < 2e-16 ***
## Engine2997 cc 1.597e+06 3.248e+06 0.492 0.622985
## Engine2998 cc 1.218e+07 2.365e+06 5.150 2.87e-07 ***
## Engine2999 cc 1.342e+07 1.393e+06 9.636 < 2e-16 ***
## Engine3198 cc 2.497e+06 1.138e+06 2.194 0.028361 *
## Engine3436 cc 7.523e+06 1.138e+06 6.612 4.93e-11 ***
## Engine3496 cc 1.035e+06 1.123e+06 0.922 0.356618
## Engine3498 cc 4.272e+05 1.548e+06 0.276 0.782670
## Engine3902 cc 3.323e+07 1.137e+06 29.212 < 2e-16 ***
## Engine3982 cc 1.683e+07 1.131e+06 14.885 < 2e-16 ***
## Engine4163 cc 2.418e+06 2.264e+06 1.068 0.285525
## Engine4663 cc 1.542e+07 1.922e+06 8.020 1.85e-15 ***
## Engine4806 cc 3.105e+06 1.191e+06 2.608 0.009188 **
## Engine4951 cc 6.016e+06 1.893e+07 0.318 0.750724
## Engine5204 cc 2.220e+07 1.140e+06 19.473 < 2e-16 ***
## Engine5461 cc 1.937e+05 1.123e+06 0.172 0.863098
## Engine624 cc -1.261e+06 1.122e+06 -1.124 0.261061
## Engine6592 cc 1.937e+07 2.209e+06 8.767 < 2e-16 ***
## Engine793 cc -9.577e+05 1.122e+06 -0.854 0.393463
## Engine796 cc -1.502e+06 5.461e+05 -2.750 0.006020 **
## Engine799 cc -1.510e+06 7.869e+05 -1.919 0.055155 .
## Engine814 cc -1.584e+06 9.312e+05 -1.701 0.089074 .
## Engine936 cc -1.252e+06 1.124e+06 -1.114 0.265331
## Engine995 cc -1.065e+06 1.122e+06 -0.949 0.342627
## Engine998 cc -1.115e+06 3.362e+05 -3.317 0.000928 ***
## Engine999 cc -1.031e+06 4.326e+05 -2.383 0.017262 *
## Kilometer:Engine1047 cc 1.733e+01 1.163e+02 0.149 0.881526
## Kilometer:Engine1061 cc 9.463e+00 2.275e+01 0.416 0.677508
## Kilometer:Engine1086 cc 6.237e+00 8.246e+00 0.756 0.449540
## Kilometer:Engine1120 cc 8.174e+00 3.083e+01 0.265 0.790964
## Kilometer:Engine1186 cc 9.279e+00 2.766e+01 0.336 0.737282
## Kilometer:Engine1196 cc 4.684e+00 1.436e+01 0.326 0.744254
## Kilometer:Engine1197 cc 5.641e+00 5.147e+00 1.096 0.273280
## Kilometer:Engine1198 cc 6.921e+00 1.129e+01 0.613 0.539989
## Kilometer:Engine1199 cc 5.823e+00 7.432e+00 0.783 0.433438
## Kilometer:Engine1248 cc 6.955e+00 5.452e+00 1.276 0.202241
## Kilometer:Engine1298 cc 8.548e+00 1.981e+01 0.431 0.666168
## Kilometer:Engine1332 cc -7.519e+00 5.255e+02 -0.014 0.988585
## Kilometer:Engine1336 cc 7.346e+00 8.954e+01 0.082 0.934621
## Kilometer:Engine1353 cc 3.797e+00 2.158e+01 0.176 0.860341
## Kilometer:Engine1364 cc 9.372e+00 3.490e+01 0.269 0.788342
## Kilometer:Engine1368 cc -1.390e+01 1.419e+01 -0.980 0.327407
## Kilometer:Engine1373 cc 7.845e+00 1.291e+01 0.608 0.543420
## Kilometer:Engine1395 cc -7.796e+00 2.125e+01 -0.367 0.713771
## Kilometer:Engine1396 cc 4.566e+00 1.334e+01 0.342 0.732228
## Kilometer:Engine1399 cc 4.569e+00 5.592e+01 0.082 0.934897
## Kilometer:Engine1451 cc 1.132e+01 4.684e+01 0.242 0.809076
## Kilometer:Engine1461 cc 9.112e+00 3.956e+00 2.303 0.021371 *
## Kilometer:Engine1462 cc 6.265e+00 1.368e+01 0.458 0.647067
## Kilometer:Engine1493 cc -3.016e+00 7.327e+00 -0.412 0.680702
## Kilometer:Engine1495 cc NA NA NA NA
## Kilometer:Engine1496 cc -2.698e+01 1.377e+01 -1.960 0.050177 .
## Kilometer:Engine1497 cc -4.684e+00 5.991e+00 -0.782 0.434462
## Kilometer:Engine1498 cc 5.934e-01 6.168e+00 0.096 0.923377
## Kilometer:Engine1499 cc 3.232e+00 2.849e+01 0.113 0.909695
## Kilometer:Engine1582 cc 9.082e+00 4.089e+00 2.221 0.026469 *
## Kilometer:Engine1586 cc 1.174e+01 2.945e+01 0.399 0.690221
## Kilometer:Engine1591 cc -3.023e+00 1.149e+01 -0.263 0.792527
## Kilometer:Engine1595 cc 1.439e+01 3.782e+01 0.380 0.703692
## Kilometer:Engine1598 cc 5.439e+00 7.897e+00 0.689 0.491026
## Kilometer:Engine1794 cc 8.680e+00 3.544e+01 0.245 0.806560
## Kilometer:Engine1796 cc -2.704e+01 5.103e+01 -0.530 0.596264
## Kilometer:Engine1797 cc 8.718e+00 2.961e+01 0.294 0.768489
## Kilometer:Engine1798 cc -1.418e+01 9.983e+00 -1.421 0.155583
## Kilometer:Engine1799 cc 6.790e+00 3.402e+01 0.200 0.841853
## Kilometer:Engine1948 cc NA NA NA NA
## Kilometer:Engine1950 cc -3.364e+01 1.059e+01 -3.178 0.001507 **
## Kilometer:Engine1956 cc -1.392e+00 9.409e+00 -0.148 0.882391
## Kilometer:Engine1968 cc -2.404e+00 5.883e+00 -0.409 0.682870
## Kilometer:Engine1969 cc -3.197e+00 2.686e+01 -0.119 0.905294
## Kilometer:Engine1984 cc -3.950e+01 1.043e+01 -3.788 0.000157 ***
## Kilometer:Engine1988 cc -3.008e+01 1.022e+02 -0.294 0.768639
## Kilometer:Engine1991 cc -4.586e+01 8.090e+00 -5.669 1.66e-08 ***
## Kilometer:Engine1995 cc -1.839e+01 5.437e+00 -3.383 0.000732 ***
## Kilometer:Engine1996 cc 1.423e+01 3.456e+01 0.412 0.680626
## Kilometer:Engine1997 cc 1.486e+00 7.059e+00 0.211 0.833264
## Kilometer:Engine1998 cc -1.004e+01 5.954e+00 -1.687 0.091861 .
## Kilometer:Engine1999 cc -6.155e+00 1.097e+01 -0.561 0.574689
## Kilometer:Engine2143 cc -2.491e+00 7.000e+00 -0.356 0.722017
## Kilometer:Engine2148 cc -4.661e+01 1.619e+01 -2.880 0.004024 **
## Kilometer:Engine2157 cc NA NA NA NA
## Kilometer:Engine2179 cc -3.246e+00 5.250e+00 -0.618 0.536409
## Kilometer:Engine2184 cc 1.594e+01 3.412e+01 0.467 0.640552
## Kilometer:Engine2198 cc NA NA NA NA
## Kilometer:Engine2199 cc -3.835e+01 5.187e+01 -0.739 0.459840
## Kilometer:Engine2354 cc 4.391e+00 3.356e+01 0.131 0.895902
## Kilometer:Engine2393 cc 5.205e-01 8.139e+00 0.064 0.949017
## Kilometer:Engine2400 cc NA NA NA NA
## Kilometer:Engine2477 cc 2.408e+01 3.883e+01 0.620 0.535272
## Kilometer:Engine2487 cc -2.162e+01 1.213e+02 -0.178 0.858575
## Kilometer:Engine2489 cc 1.481e+00 1.052e+02 0.014 0.988764
## Kilometer:Engine2494 cc -2.069e+01 6.420e+00 -3.223 0.001292 **
## Kilometer:Engine2497 cc -4.460e+01 3.941e+02 -0.113 0.909907
## Kilometer:Engine2498 cc 6.706e+00 2.375e+01 0.282 0.777668
## Kilometer:Engine2523 cc 5.844e+00 1.762e+01 0.332 0.740242
## Kilometer:Engine2609 cc NA NA NA NA
## Kilometer:Engine2694 cc -5.777e+00 3.290e+01 -0.176 0.860635
## Kilometer:Engine2696 cc 1.804e+00 3.891e+01 0.046 0.963027
## Kilometer:Engine2755 cc 7.335e+00 6.120e+00 1.198 0.230897
## Kilometer:Engine2835 cc NA NA NA NA
## Kilometer:Engine2925 cc -1.950e+02 8.378e+01 -2.328 0.020022 *
## Kilometer:Engine2953 cc NA NA NA NA
## Kilometer:Engine2967 cc -6.122e+00 6.650e+00 -0.921 0.357407
## Kilometer:Engine2979 cc NA NA NA NA
## Kilometer:Engine2982 cc 9.362e+00 7.196e+00 1.301 0.193443
## Kilometer:Engine2987 cc -4.281e+01 8.771e+00 -4.881 1.15e-06 ***
## Kilometer:Engine2993 cc -5.600e+01 6.812e+00 -8.221 3.75e-16 ***
## Kilometer:Engine2995 cc -1.717e+02 2.074e+01 -8.279 2.35e-16 ***
## Kilometer:Engine2996 cc -9.314e+01 1.932e+01 -4.822 1.54e-06 ***
## Kilometer:Engine2997 cc -6.233e-01 4.329e+01 -0.014 0.988514
## Kilometer:Engine2998 cc -1.528e+02 1.502e+02 -1.017 0.309216
## Kilometer:Engine2999 cc -1.437e+02 1.960e+01 -7.334 3.32e-13 ***
## Kilometer:Engine3198 cc -1.599e+01 2.201e+01 -0.726 0.467740
## Kilometer:Engine3436 cc NA NA NA NA
## Kilometer:Engine3496 cc NA NA NA NA
## Kilometer:Engine3498 cc -4.438e+00 2.083e+01 -0.213 0.831279
## Kilometer:Engine3902 cc NA NA NA NA
## Kilometer:Engine3982 cc NA NA NA NA
## Kilometer:Engine4163 cc -1.544e+01 5.183e+01 -0.298 0.765792
## Kilometer:Engine4663 cc -2.047e+02 3.525e+01 -5.809 7.40e-09 ***
## Kilometer:Engine4806 cc NA NA NA NA
## Kilometer:Engine4951 cc 1.415e+01 7.882e+02 0.018 0.985681
## Kilometer:Engine5204 cc NA NA NA NA
## Kilometer:Engine5461 cc NA NA NA NA
## Kilometer:Engine624 cc NA NA NA NA
## Kilometer:Engine6592 cc -3.089e+01 4.449e+01 -0.694 0.487493
## Kilometer:Engine793 cc NA NA NA NA
## Kilometer:Engine796 cc 7.221e+00 1.021e+01 0.707 0.479623
## Kilometer:Engine799 cc 8.717e+00 1.611e+01 0.541 0.588499
## Kilometer:Engine814 cc 8.889e+00 1.656e+01 0.537 0.591513
## Kilometer:Engine936 cc NA NA NA NA
## Kilometer:Engine995 cc NA NA NA NA
## Kilometer:Engine998 cc 2.776e+00 5.902e+00 0.470 0.638208
## Kilometer:Engine999 cc 3.469e+00 9.529e+00 0.364 0.715886
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1115000 on 1861 degrees of freedom
## Multiple R-squared: 0.8081, Adjusted R-squared: 0.7878
## F-statistic: 39.79 on 197 and 1861 DF, p-value: < 2.2e-16
ggplot(data, aes(x = Kilometer, y = Price)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "red")
## `geom_smooth()` using formula = 'y ~ x'
Q-47:How can we visually check if residuals are randomly distributed?
model <- lm(Price ~ Kilometer, data=data)
ggplot(data, aes(Kilometer, residuals(model))) +
geom_point() +
geom_hline(yintercept=0, color="red")
Q-48:How are prices distributed across all cars?
ggplot(data, aes(x=Price)) +
geom_histogram(bins=30, fill="blue")+
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
scale_x_continuous(labels = function(x) format(x, scientific = FALSE))
theme_minimal()
## <theme> List of 144
## $ line : <ggplot2::element_line>
## ..@ colour : chr "black"
## ..@ linewidth : num 0.5
## ..@ linetype : num 1
## ..@ lineend : chr "butt"
## ..@ linejoin : chr "round"
## ..@ arrow : logi FALSE
## ..@ arrow.fill : chr "black"
## ..@ inherit.blank: logi TRUE
## $ rect : <ggplot2::element_rect>
## ..@ fill : chr "white"
## ..@ colour : chr "black"
## ..@ linewidth : num 0.5
## ..@ linetype : num 1
## ..@ linejoin : chr "round"
## ..@ inherit.blank: logi TRUE
## $ text : <ggplot2::element_text>
## ..@ family : chr ""
## ..@ face : chr "plain"
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : chr "black"
## ..@ size : num 11
## ..@ hjust : num 0.5
## ..@ vjust : num 0.5
## ..@ angle : num 0
## ..@ lineheight : num 0.9
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 0 0
## ..@ debug : logi FALSE
## ..@ inherit.blank: logi TRUE
## $ title : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : NULL
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ point : <ggplot2::element_point>
## ..@ colour : chr "black"
## ..@ shape : num 19
## ..@ size : num 1.5
## ..@ fill : chr "white"
## ..@ stroke : num 0.5
## ..@ inherit.blank: logi TRUE
## $ polygon : <ggplot2::element_polygon>
## ..@ fill : chr "white"
## ..@ colour : chr "black"
## ..@ linewidth : num 0.5
## ..@ linetype : num 1
## ..@ linejoin : chr "round"
## ..@ inherit.blank: logi TRUE
## $ geom : <ggplot2::element_geom>
## ..@ ink : chr "black"
## ..@ paper : chr "white"
## ..@ accent : chr "#3366FF"
## ..@ linewidth : num 0.5
## ..@ borderwidth: num 0.5
## ..@ linetype : int 1
## ..@ bordertype : int 1
## ..@ family : chr ""
## ..@ fontsize : num 3.87
## ..@ pointsize : num 1.5
## ..@ pointshape : num 19
## ..@ colour : NULL
## ..@ fill : NULL
## $ spacing : 'simpleUnit' num 5.5points
## ..- attr(*, "unit")= int 8
## $ margins : <ggplot2::margin> num [1:4] 5.5 5.5 5.5 5.5
## $ aspect.ratio : NULL
## $ axis.title : NULL
## $ axis.title.x : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : num 1
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 2.75 0 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.title.x.top : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : num 0
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 2.75 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.title.x.bottom : NULL
## $ axis.title.y : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : num 1
## ..@ angle : num 90
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 2.75 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.title.y.left : NULL
## $ axis.title.y.right : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : num 1
## ..@ angle : num -90
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 0 2.75
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : chr "#4D4D4DFF"
## ..@ size : 'rel' num 0.8
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : NULL
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.x : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : num 1
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 2.2 0 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.x.top : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 4.95 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.x.bottom : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 4.95 0 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.y : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : num 1
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 2.2 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.y.left : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 4.95 0 0
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.y.right : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 0 0 4.95
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.text.theta : NULL
## $ axis.text.r : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : num 0.5
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : <ggplot2::margin> num [1:4] 0 2.2 0 2.2
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ axis.ticks : <ggplot2::element_blank>
## $ axis.ticks.x : NULL
## $ axis.ticks.x.top : NULL
## $ axis.ticks.x.bottom : NULL
## $ axis.ticks.y : NULL
## $ axis.ticks.y.left : NULL
## $ axis.ticks.y.right : NULL
## $ axis.ticks.theta : NULL
## $ axis.ticks.r : NULL
## $ axis.minor.ticks.x.top : NULL
## $ axis.minor.ticks.x.bottom : NULL
## $ axis.minor.ticks.y.left : NULL
## $ axis.minor.ticks.y.right : NULL
## $ axis.minor.ticks.theta : NULL
## $ axis.minor.ticks.r : NULL
## $ axis.ticks.length : 'rel' num 0.5
## $ axis.ticks.length.x : NULL
## $ axis.ticks.length.x.top : NULL
## $ axis.ticks.length.x.bottom : NULL
## $ axis.ticks.length.y : NULL
## $ axis.ticks.length.y.left : NULL
## $ axis.ticks.length.y.right : NULL
## $ axis.ticks.length.theta : NULL
## $ axis.ticks.length.r : NULL
## $ axis.minor.ticks.length : 'rel' num 0.75
## $ axis.minor.ticks.length.x : NULL
## $ axis.minor.ticks.length.x.top : NULL
## $ axis.minor.ticks.length.x.bottom: NULL
## $ axis.minor.ticks.length.y : NULL
## $ axis.minor.ticks.length.y.left : NULL
## $ axis.minor.ticks.length.y.right : NULL
## $ axis.minor.ticks.length.theta : NULL
## $ axis.minor.ticks.length.r : NULL
## $ axis.line : <ggplot2::element_blank>
## $ axis.line.x : NULL
## $ axis.line.x.top : NULL
## $ axis.line.x.bottom : NULL
## $ axis.line.y : NULL
## $ axis.line.y.left : NULL
## $ axis.line.y.right : NULL
## $ axis.line.theta : NULL
## $ axis.line.r : NULL
## $ legend.background : <ggplot2::element_blank>
## $ legend.margin : NULL
## $ legend.spacing : 'rel' num 2
## $ legend.spacing.x : NULL
## $ legend.spacing.y : NULL
## $ legend.key : <ggplot2::element_blank>
## $ legend.key.size : 'simpleUnit' num 1.2lines
## ..- attr(*, "unit")= int 3
## $ legend.key.height : NULL
## $ legend.key.width : NULL
## $ legend.key.spacing : NULL
## $ legend.key.spacing.x : NULL
## $ legend.key.spacing.y : NULL
## $ legend.key.justification : NULL
## $ legend.frame : NULL
## $ legend.ticks : NULL
## $ legend.ticks.length : 'rel' num 0.2
## $ legend.axis.line : NULL
## $ legend.text : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : 'rel' num 0.8
## ..@ hjust : NULL
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : NULL
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ legend.text.position : NULL
## $ legend.title : <ggplot2::element_text>
## ..@ family : NULL
## ..@ face : NULL
## ..@ italic : chr NA
## ..@ fontweight : num NA
## ..@ fontwidth : num NA
## ..@ colour : NULL
## ..@ size : NULL
## ..@ hjust : num 0
## ..@ vjust : NULL
## ..@ angle : NULL
## ..@ lineheight : NULL
## ..@ margin : NULL
## ..@ debug : NULL
## ..@ inherit.blank: logi TRUE
## $ legend.title.position : NULL
## $ legend.position : chr "right"
## $ legend.position.inside : NULL
## $ legend.direction : NULL
## $ legend.byrow : NULL
## $ legend.justification : chr "center"
## $ legend.justification.top : NULL
## $ legend.justification.bottom : NULL
## $ legend.justification.left : NULL
## $ legend.justification.right : NULL
## $ legend.justification.inside : NULL
## [list output truncated]
## @ complete: logi TRUE
## @ validate: logi TRUE
Q-49:How does engine size and mileage together influence price?
ggplot(data, aes(x = Kilometer, y = Price)) +
geom_point(alpha = 0.5, color = "blue") +
geom_smooth(method = "lm", color = "red") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
Q-50:How does model prediction vary across different years?
model <- lm(Price ~ Year, data=data)
data$Pred <- predict(model)
ggplot(data, aes(Year, Pred)) +
geom_smooth(method="loess", color="purple")
## `geom_smooth()` using formula = 'y ~ x'