cars.df <- read.csv("OLX_Car_Data_CSV (2).csv")
# attach the data table
attach(cars.df)
# Display the column names
colnames(cars.df)
## [1] "Brand" "Condition" "Fuel"
## [4] "KMs.Driven" "Model" "Price"
## [7] "Registered.City" "Transaction.Type" "Year"
## [10] "Car.Segment"
# Display the Data Dimensions
dim(cars.df)
## [1] 15312 10
# creating table for counts
counts <- table(Condition,Car.Segment)
# plotting grouped bar plot
barplot(counts, col = c("White","black"),
xlab = "Car.Segment",ylab = "Condition",
main = "Grouped Bar Plot",beside=TRUE)
counts <- table(Condition,Car.Segment)
# plotting stacked bar plot
barplot(counts, col = c("white","black"),
xlab = "Car Segment",ylab = "Condition",
main = "Stacked Bar Plot",
)
#Count of car by Condition of car
round(prop.table(table(Condition))*100,2)
## Condition
## New Used
## 19.85 80.15
# plotting bar chart
plot(cars.df$Condition, xlab= "Fuel", ylab= "Count")
#Count of car by fuel type
round(prop.table(table(Fuel))*100,2)
## Fuel
## CNG Diesel Hybrid LPG Petrol
## 23.79 1.32 3.58 0.04 71.28
# plotting bar chart
plot(cars.df$Fuel, xlab= "Fuel", ylab= "Count")
#Count of car by Transaction Type
round(prop.table(table(Transaction.Type))*100,2)
## Transaction.Type
## Cash Installment/Leasing
## 94.18 5.82
# plotting bar chart
plot(cars.df$Transaction.Type, xlab= "Transaction Type", ylab= "Count")
# creating table of counts
tab <- table(Fuel)
# saving table as dataframe
tab.df <- as.data.frame(tab)
# storing counts into a variable x
x <- tab.df$Freq
# defining the lables
labels <- c("CNG", "Diesel", "Hybrid", "LPG" ,"Petrol")
# Plotting Pie chart.
pie(x,labels, main = "Pie Chart of Airlines ")
# Pie chart for fuel type
# creating table of counts
tab <- table(Fuel)
# saving table as dataframe
tab.df <- as.data.frame(tab)
# storing counts into a variable x
x <- tab.df$Freq
# defining the lables
labels <- c("CNG", "Diesel", "Hybrid", "LPG" ,"Petrol")
library(plotrix)
## Warning: package 'plotrix' was built under R version 3.5.3
# Plot the chart.
pie3D(x,labels = labels, explode = 0.1,
main = "Pie Chart of Airlines ")
# Pie chart for Condition
# creating table of counts
tab <- table(Condition)
# saving table as dataframe
tab.df <- as.data.frame(tab)
# storing counts into a variable x
x <- tab.df$Freq
# defining the lables
labels <- c("New", "Used")
# Plot the chart.
pie3D(x,labels = labels, explode = 0.1,
main = "Pie Chart of Airlines ")
# loading the package
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
# plotting histogram
ggplot(data = cars.df) +
geom_histogram(mapping = aes(x = Price))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# loading the package
library(ggplot2)
# plotting histogram
ggplot(data = cars.df) +
geom_histogram(mapping = aes(x = KMs.Driven))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Display a Box-Plot of the KMs driven in the dataframe cars.df
# plotting the boxplot for inbuilt data
boxplot(cars.df$KMs.Driven,width = 0.5,
horizontal = TRUE,main = "boxplot for KMs Driven",
xlab = "KMs Driven",col = c("lightblue"))
#Scatter Plots of Year vs Price
ggplot(data =cars.df, mapping = aes(x = Year, y=Price)) +
geom_point() +
xlab("Year") +
ylab("Price") +
scale_color_manual(values=c("#9F2042","#AEA200"))
#Scatterplots of KMDriven vs Price
ggplot(data =cars.df, mapping = aes(x = KMs.Driven, y=Price)) +
geom_point() +
xlab("KM Driven") +
ylab("Price") +
scale_color_manual(values=c("#9F2042","#AEA200"))
Subset.df <- cars.df[,c("KMs.Driven","Price","Year")]
corMat <- cor(Subset.df)
# round off upto 2 decimal places
round(corMat, 8)
## KMs.Driven Price Year
## KMs.Driven 1.00000000 -0.02268627 -0.1040009
## Price -0.02268627 1.00000000 0.1737995
## Year -0.10400086 0.17379951 1.0000000
#Visualizing correlations (ex- Correlogram)
library(corrplot)
## corrplot 0.84 loaded
corrplot(cor(Subset.df), method = "circle")
corrplot(cor(Subset.df), method = "number")