- Sorting Cars as per their Location
carsdata<-read.csv("C:/Users/Vansh/Downloads/DataSet For CA-4/used_cars_data.csv")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
sortdata<-carsdata %>% arrange(desc(Name))
- Removing NA values and strings from the dataset and converting
strings to numbers
library(dplyr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Puredata<-sortdata %>% mutate(Mileage=str_remove(Mileage," kmpl")) %>%
mutate(Mileage=as.numeric(str_remove(Mileage," km/kg"))) %>% mutate(Engine=as.numeric(str_remove(Engine," CC"))) %>%
mutate(Power=as.numeric(str_remove(Power," bhp"))) %>%
na.omit(sortdata)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Power = as.numeric(str_remove(Power, " bhp"))`.
## Caused by warning:
## ! NAs introduced by coercion
- Average Mileage of Diesel and Petrol Cars on Sale
library(dplyr)
library(tidyverse)
Puredata %>% filter(Fuel_Type=="Diesel" | Fuel_Type=="Petrol") %>%
group_by(Fuel_Type) %>% summarise("Average mileage of each car"=mean(as.numeric(Mileage)))
## # A tibble: 2 × 2
## Fuel_Type `Average mileage of each car`
## <chr> <dbl>
## 1 Diesel 18.7
## 2 Petrol 17.7
- Count of cars that are being sold in each city
library(dplyr)
library(tidyverse)
Puredata %>% group_by(Location) %>% summarise(count=n())
## # A tibble: 11 × 2
## Location count
## <chr> <int>
## 1 Ahmedabad 220
## 2 Bangalore 347
## 3 Chennai 476
## 4 Coimbatore 629
## 5 Delhi 545
## 6 Hyderabad 718
## 7 Jaipur 402
## 8 Kochi 645
## 9 Kolkata 521
## 10 Mumbai 775
## 11 Pune 594
- Checking by how much the average distance driven is deviating from
each cars distance
sd(Puredata$Kilometers_Driven)
## [1] 92169.41
- Bar graph depicting the number of cars that are on sale by their
city along with their Fuel Type
library(ggplot2)
library(tidyverse)
ggplot(data=Puredata,aes(y=Location,fill=Fuel_Type))+geom_bar()+labs(title="Cars by City",x="Count",y="Cities")

- Histogram depicting the age of Cars that are on sale as per their
Companies
library(ggplot2)
library(tidyverse)
ggplot(data=Puredata,aes(x=Year))+geom_histogram(binwidth = 2,color="black",fill="blue")+
labs(title = "Frequency of cars launched,between 1990 and 2020 ",
y="Frequency ")

- Bar graph depicting cars sold as per their Companies
library(ggplot2)
library(tidyverse)
CarCom<-Puredata %>% mutate(Name=str_replace(Name,"Isuzu","ISUZU")) %>% mutate(Name=word(Name,1))
ggplot(data = CarCom,aes(y=Name,fill="orange"))+geom_bar()+labs(title = "Cars being Sold as per company")

- Average resale value of each car company
avgprice<-CarCom %>% group_by(Name) %>% summarise("Average_price"=mean(Price))
avgprice<-as.data.frame(avgprice)
ggplot(data = avgprice,aes(x=Average_price,y=Name))+geom_bar(stat="identity",fill="skyblue")+
labs(title = "Average resale value of each car Company")

- Bar graph depicting luxury cars on sale as per the city
luxuryCars<-CarCom %>% filter(Name=="Volvo" | Name=="Porsche" |
Name=="Mercedez-Benz"|Name=="Land" |Name=="Lamborghini"|
Name=="Jaguar"|Name=="Bentley"|Name=="Audi")
ggplot(data=luxuryCars,aes(y=Location,fill=Name))+geom_bar()+
labs(title = "Luxury Cars being sold as per city",y="Company")

- Correlation Between the engine’s displacement and Mileage
cor(Puredata$Mileage,Puredata$Engine,method = "pearson")
## [1] -0.6372576
- Correlation between the launch year of the car and its resale
value
cor(Puredata$Year,Puredata$Price)
## [1] 0.2994755
- A corrplot of relation between the Engine CC, Horsepower and the
Mileage
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.3
## corrplot 0.92 loaded
cor_mat<-Puredata %>% select(Mileage,Engine,Power)
cars_cor_plot<-cor(cor_mat)
corrplot(cars_cor_plot,method="shade")

- Corrplot of correlation between the launch year, kilometers driven
and the resale price
library(corrplot)
cor_akt<-Puredata %>% select(Kilometers_Driven,Year,Price)
cars_cor_plot2<-cor(cor_akt)
corrplot(cars_cor_plot2)

- Box-plot depicting the mileage as per the fuel type
boxplot(Mileage~Fuel_Type,
data=Puredata,
main="Boxplot representing Mileage of Cars as per their fuel type",
xlab = "Fuel Type",
ylab = "Mileage",
col=c("red","green","blue","yellow"))

- A linear model depicting the price of the car based upon the
Manufacturer
library(ggeffects)
## Warning: package 'ggeffects' was built under R version 4.3.3
model<-lm(Price~Name,data=CarCom)
predict2<-ggpredict(model,terms="Name")
plot(predict2)

pred<-data.frame(Name="Tata")
predicted<-ggpredict(model,new_data=pred)
print(predicted)
## $Name
## # Predicted values of Price
##
## Name | Predicted | 95% CI
## ------------------------------------------
## Volvo | 18.80 | 15.73, 21.88
## Volkswagen | 5.31 | 4.51, 6.10
## Toyota | 11.91 | 11.20, 12.62
## Tata | 3.61 | 2.56, 4.65
## Skoda | 7.59 | 6.51, 8.66
## Renault | 5.80 | 4.63, 6.97
## Porsche | 49.20 | 45.68, 52.73
## Nissan | 4.78 | 3.29, 6.28
## Mitsubishi | 11.06 | 8.35, 13.77
## Mini | 26.90 | 24.13, 29.66
## Mercedes-Benz | 26.92 | 26.13, 27.71
## Maruti | 4.55 | 4.14, 4.97
## Mahindra | 8.06 | 7.20, 8.92
## Land | 39.86 | 38.00, 41.73
## Lamborghini | 120.00 | 105.92, 134.08
## Jeep | 18.72 | 15.08, 22.36
## Jaguar | 37.63 | 35.41, 39.86
## ISUZU | 14.70 | 6.57, 22.83
## Hyundai | 5.51 | 5.08, 5.95
## Honda | 5.42 | 4.85, 6.00
## Ford | 6.96 | 6.14, 7.78
## Force | 9.33 | 1.20, 17.46
## Fiat | 3.57 | 0.63, 6.51
## Datsun | 3.05 | -0.86, 6.96
## Chevrolet | 3.06 | 1.77, 4.34
## Bentley | 59.00 | 44.92, 73.08
## BMW | 25.44 | 24.57, 26.31
## Audi | 25.57 | 24.65, 26.49
## Ambassador | 1.35 | -12.73, 15.43
##
##
## attr(,"class")
## [1] "ggalleffects" "list"
## attr(,"model.name")
## [1] "model"
- Pie chart showing the share of each transmission type among the cars
that are on sale
trantab<-table(Puredata$Transmission)
Tran_type<-c("Automatic","Manual")
per_tran<-round(trantab/sum(trantab)*100,2)
pie(trantab,labels = paste(Tran_type,paste(per_tran,"%",sep=""),sep=" "))

- Summary of the Whole Dataset
summary(Puredata)
## S.No. Name Location Year
## Min. : 0 Length:5872 Length:5872 Min. :1998
## 1st Qu.:1510 Class :character Class :character 1st Qu.:2012
## Median :3016 Mode :character Mode :character Median :2014
## Mean :3013 Mean :2013
## 3rd Qu.:4516 3rd Qu.:2016
## Max. :6018 Max. :2019
## Kilometers_Driven Fuel_Type Transmission Owner_Type
## Min. : 171 Length:5872 Length:5872 Length:5872
## 1st Qu.: 33422 Class :character Class :character Class :character
## Median : 52609 Mode :character Mode :character Mode :character
## Mean : 58317
## 3rd Qu.: 72403
## Max. :6500000
## Mileage Engine Power Seats
## Min. : 0.00 Min. : 624 Min. : 34.2 Min. : 2.000
## 1st Qu.:15.26 1st Qu.:1198 1st Qu.: 75.0 1st Qu.: 5.000
## Median :18.20 Median :1496 Median : 97.7 Median : 5.000
## Mean :18.28 Mean :1626 Mean :113.3 Mean : 5.284
## 3rd Qu.:21.10 3rd Qu.:1991 3rd Qu.:138.1 3rd Qu.: 5.000
## Max. :33.54 Max. :5998 Max. :560.0 Max. :10.000
## New_Price Price
## Length:5872 Min. : 0.440
## Class :character 1st Qu.: 3.518
## Mode :character Median : 5.750
## Mean : 9.604
## 3rd Qu.: 10.000
## Max. :160.000