1. Sorting Cars as per their Location
carsdata<-read.csv("C:/Users/Vansh/Downloads/DataSet For CA-4/used_cars_data.csv")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
sortdata<-carsdata %>% arrange(desc(Name))
  1. Removing NA values and strings from the dataset and converting strings to numbers
library(dplyr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Puredata<-sortdata %>% mutate(Mileage=str_remove(Mileage," kmpl")) %>% 
mutate(Mileage=as.numeric(str_remove(Mileage," km/kg"))) %>% mutate(Engine=as.numeric(str_remove(Engine," CC"))) %>% 
mutate(Power=as.numeric(str_remove(Power," bhp"))) %>% 
na.omit(sortdata)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Power = as.numeric(str_remove(Power, " bhp"))`.
## Caused by warning:
## ! NAs introduced by coercion
  1. Average Mileage of Diesel and Petrol Cars on Sale
library(dplyr)
library(tidyverse)
Puredata %>% filter(Fuel_Type=="Diesel" | Fuel_Type=="Petrol") %>% 
group_by(Fuel_Type) %>% summarise("Average mileage of each car"=mean(as.numeric(Mileage)))
## # A tibble: 2 × 2
##   Fuel_Type `Average mileage of each car`
##   <chr>                             <dbl>
## 1 Diesel                             18.7
## 2 Petrol                             17.7
  1. Count of cars that are being sold in each city
library(dplyr)
library(tidyverse)
Puredata %>% group_by(Location) %>% summarise(count=n())
## # A tibble: 11 × 2
##    Location   count
##    <chr>      <int>
##  1 Ahmedabad    220
##  2 Bangalore    347
##  3 Chennai      476
##  4 Coimbatore   629
##  5 Delhi        545
##  6 Hyderabad    718
##  7 Jaipur       402
##  8 Kochi        645
##  9 Kolkata      521
## 10 Mumbai       775
## 11 Pune         594
  1. Checking by how much the average distance driven is deviating from each cars distance
sd(Puredata$Kilometers_Driven)
## [1] 92169.41
  1. Bar graph depicting the number of cars that are on sale by their city along with their Fuel Type
library(ggplot2)
library(tidyverse)
ggplot(data=Puredata,aes(y=Location,fill=Fuel_Type))+geom_bar()+labs(title="Cars by City",x="Count",y="Cities")

  1. Histogram depicting the age of Cars that are on sale as per their Companies
library(ggplot2)
library(tidyverse)
ggplot(data=Puredata,aes(x=Year))+geom_histogram(binwidth = 2,color="black",fill="blue")+
labs(title = "Frequency of cars launched,between 1990 and 2020 ",
     y="Frequency ")

  1. Bar graph depicting cars sold as per their Companies
library(ggplot2)
library(tidyverse)
CarCom<-Puredata %>% mutate(Name=str_replace(Name,"Isuzu","ISUZU")) %>% mutate(Name=word(Name,1))
ggplot(data = CarCom,aes(y=Name,fill="orange"))+geom_bar()+labs(title = "Cars being Sold as per company")

  1. Average resale value of each car company
avgprice<-CarCom %>% group_by(Name) %>% summarise("Average_price"=mean(Price))
avgprice<-as.data.frame(avgprice)
ggplot(data = avgprice,aes(x=Average_price,y=Name))+geom_bar(stat="identity",fill="skyblue")+
  labs(title = "Average resale value of each car Company")

  1. Bar graph depicting luxury cars on sale as per the city
luxuryCars<-CarCom %>% filter(Name=="Volvo" | Name=="Porsche" | 
                                Name=="Mercedez-Benz"|Name=="Land" |Name=="Lamborghini"|
                                Name=="Jaguar"|Name=="Bentley"|Name=="Audi")
ggplot(data=luxuryCars,aes(y=Location,fill=Name))+geom_bar()+
  labs(title = "Luxury Cars being sold as per city",y="Company")

  1. Correlation Between the engine’s displacement and Mileage
cor(Puredata$Mileage,Puredata$Engine,method = "pearson")
## [1] -0.6372576
  1. Correlation between the launch year of the car and its resale value
cor(Puredata$Year,Puredata$Price)
## [1] 0.2994755
  1. A corrplot of relation between the Engine CC, Horsepower and the Mileage
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.3
## corrplot 0.92 loaded
cor_mat<-Puredata %>% select(Mileage,Engine,Power)
cars_cor_plot<-cor(cor_mat)
corrplot(cars_cor_plot,method="shade")

  1. Corrplot of correlation between the launch year, kilometers driven and the resale price
library(corrplot)
cor_akt<-Puredata %>% select(Kilometers_Driven,Year,Price)
cars_cor_plot2<-cor(cor_akt)
corrplot(cars_cor_plot2)

  1. Box-plot depicting the mileage as per the fuel type
boxplot(Mileage~Fuel_Type,
        data=Puredata,
        main="Boxplot representing Mileage of Cars as per their fuel type",
        xlab = "Fuel Type",
        ylab = "Mileage",
        col=c("red","green","blue","yellow"))

  1. A linear model depicting the price of the car based upon the Manufacturer
library(ggeffects)
## Warning: package 'ggeffects' was built under R version 4.3.3
model<-lm(Price~Name,data=CarCom)
predict2<-ggpredict(model,terms="Name")
plot(predict2)

pred<-data.frame(Name="Tata")
predicted<-ggpredict(model,new_data=pred)
print(predicted)
## $Name
## # Predicted values of Price
## 
## Name          | Predicted |         95% CI
## ------------------------------------------
## Volvo         |     18.80 |  15.73,  21.88
## Volkswagen    |      5.31 |   4.51,   6.10
## Toyota        |     11.91 |  11.20,  12.62
## Tata          |      3.61 |   2.56,   4.65
## Skoda         |      7.59 |   6.51,   8.66
## Renault       |      5.80 |   4.63,   6.97
## Porsche       |     49.20 |  45.68,  52.73
## Nissan        |      4.78 |   3.29,   6.28
## Mitsubishi    |     11.06 |   8.35,  13.77
## Mini          |     26.90 |  24.13,  29.66
## Mercedes-Benz |     26.92 |  26.13,  27.71
## Maruti        |      4.55 |   4.14,   4.97
## Mahindra      |      8.06 |   7.20,   8.92
## Land          |     39.86 |  38.00,  41.73
## Lamborghini   |    120.00 | 105.92, 134.08
## Jeep          |     18.72 |  15.08,  22.36
## Jaguar        |     37.63 |  35.41,  39.86
## ISUZU         |     14.70 |   6.57,  22.83
## Hyundai       |      5.51 |   5.08,   5.95
## Honda         |      5.42 |   4.85,   6.00
## Ford          |      6.96 |   6.14,   7.78
## Force         |      9.33 |   1.20,  17.46
## Fiat          |      3.57 |   0.63,   6.51
## Datsun        |      3.05 |  -0.86,   6.96
## Chevrolet     |      3.06 |   1.77,   4.34
## Bentley       |     59.00 |  44.92,  73.08
## BMW           |     25.44 |  24.57,  26.31
## Audi          |     25.57 |  24.65,  26.49
## Ambassador    |      1.35 | -12.73,  15.43
## 
## 
## attr(,"class")
## [1] "ggalleffects" "list"        
## attr(,"model.name")
## [1] "model"
  1. Pie chart showing the share of each transmission type among the cars that are on sale
trantab<-table(Puredata$Transmission)
Tran_type<-c("Automatic","Manual")
per_tran<-round(trantab/sum(trantab)*100,2)
pie(trantab,labels = paste(Tran_type,paste(per_tran,"%",sep=""),sep=" "))

  1. Summary of the Whole Dataset
summary(Puredata)
##      S.No.          Name             Location              Year     
##  Min.   :   0   Length:5872        Length:5872        Min.   :1998  
##  1st Qu.:1510   Class :character   Class :character   1st Qu.:2012  
##  Median :3016   Mode  :character   Mode  :character   Median :2014  
##  Mean   :3013                                         Mean   :2013  
##  3rd Qu.:4516                                         3rd Qu.:2016  
##  Max.   :6018                                         Max.   :2019  
##  Kilometers_Driven  Fuel_Type         Transmission        Owner_Type       
##  Min.   :    171   Length:5872        Length:5872        Length:5872       
##  1st Qu.:  33422   Class :character   Class :character   Class :character  
##  Median :  52609   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :  58317                                                           
##  3rd Qu.:  72403                                                           
##  Max.   :6500000                                                           
##     Mileage          Engine         Power           Seats       
##  Min.   : 0.00   Min.   : 624   Min.   : 34.2   Min.   : 2.000  
##  1st Qu.:15.26   1st Qu.:1198   1st Qu.: 75.0   1st Qu.: 5.000  
##  Median :18.20   Median :1496   Median : 97.7   Median : 5.000  
##  Mean   :18.28   Mean   :1626   Mean   :113.3   Mean   : 5.284  
##  3rd Qu.:21.10   3rd Qu.:1991   3rd Qu.:138.1   3rd Qu.: 5.000  
##  Max.   :33.54   Max.   :5998   Max.   :560.0   Max.   :10.000  
##   New_Price             Price        
##  Length:5872        Min.   :  0.440  
##  Class :character   1st Qu.:  3.518  
##  Mode  :character   Median :  5.750  
##                     Mean   :  9.604  
##                     3rd Qu.: 10.000  
##                     Max.   :160.000