Part 1: Read the data..

cars.df <- read.csv("OLX_Car_Data_CSV (2).csv")
# attach the data table
attach(cars.df)

Part 2: Column names

# Display the column names
colnames(cars.df)
##  [1] "Brand"            "Condition"        "Fuel"            
##  [4] "KMs.Driven"       "Model"            "Price"           
##  [7] "Registered.City"  "Transaction.Type" "Year"            
## [10] "Car.Segment"

Part 3: Data Dimensions

# Display the Data Dimensions
dim(cars.df)
## [1] 15312    10

Barplots

# creating table for counts
counts <- table(Condition,Car.Segment)
# plotting grouped bar plot
barplot(counts, col = c("White","black"),
                       xlab = "Car.Segment",ylab = "Condition",
                       main = "Grouped Bar Plot",beside=TRUE)

counts <- table(Condition,Car.Segment)
# plotting stacked bar plot
barplot(counts, col = c("white","black"),
                       xlab = "Car Segment",ylab = "Condition",
                       main = "Stacked Bar Plot",
                       )

#Count of car by Condition of car

round(prop.table(table(Condition))*100,2)
## Condition
##   New  Used 
## 19.85 80.15
# plotting bar chart
plot(cars.df$Condition, xlab= "Fuel", ylab= "Count")

#Count of car by fuel type

round(prop.table(table(Fuel))*100,2)
## Fuel
##    CNG Diesel Hybrid    LPG Petrol 
##  23.79   1.32   3.58   0.04  71.28
# plotting bar chart
plot(cars.df$Fuel, xlab= "Fuel", ylab= "Count")

#Count of car by Transaction Type

round(prop.table(table(Transaction.Type))*100,2)
## Transaction.Type
##                Cash Installment/Leasing 
##               94.18                5.82
# plotting bar chart
plot(cars.df$Transaction.Type, xlab= "Transaction Type", ylab= "Count")

Pie chart for fuel type

# creating table of counts
tab <- table(Fuel)
# saving table as dataframe
tab.df <- as.data.frame(tab)
# storing counts into a variable x
x <- tab.df$Freq
# defining the lables
labels <- c("CNG", "Diesel", "Hybrid", "LPG" ,"Petrol")
# Plotting Pie chart.
pie(x,labels, main = "Pie Chart of Airlines ")

# Pie chart for fuel type

# creating table of counts
tab <- table(Fuel)
# saving table as dataframe
tab.df <- as.data.frame(tab)
# storing counts into a variable x
x <- tab.df$Freq
# defining the lables
labels <- c("CNG", "Diesel", "Hybrid", "LPG" ,"Petrol")
library(plotrix)
## Warning: package 'plotrix' was built under R version 3.5.3
# Plot the chart.
pie3D(x,labels = labels, explode = 0.1,
          main = "Pie Chart of Airlines ")

# Pie chart for Condition

# creating table of counts
tab <- table(Condition)
# saving table as dataframe
tab.df <- as.data.frame(tab)
# storing counts into a variable x
x <- tab.df$Freq
# defining the lables
labels <- c("New", "Used")
# Plot the chart.
pie3D(x,labels = labels, explode = 0.1,
          main = "Pie Chart of Airlines ")

Histogram for Kms driven

# loading the package
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
# plotting histogram
ggplot(data = cars.df) + 
  geom_histogram(mapping = aes(x = Price))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Histogram for Kms driven

# loading the package
library(ggplot2)
# plotting histogram
ggplot(data = cars.df) + 
  geom_histogram(mapping = aes(x = KMs.Driven))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Display a Box-Plot of the KMs driven in the dataframe cars.df

# plotting the boxplot for inbuilt data
boxplot(cars.df$KMs.Driven,width = 0.5,
horizontal = TRUE,main = "boxplot for KMs Driven",
xlab = "KMs Driven",col = c("lightblue"))

#Scatter Plots of Year vs Price

ggplot(data =cars.df, mapping = aes(x = Year, y=Price)) +
  geom_point() +
  xlab("Year") +
  ylab("Price") +
  scale_color_manual(values=c("#9F2042","#AEA200"))

#Scatterplots of KMDriven vs Price

ggplot(data =cars.df, mapping = aes(x = KMs.Driven, y=Price)) +
  geom_point() +
  xlab("KM Driven") +
  ylab("Price") +
  scale_color_manual(values=c("#9F2042","#AEA200"))

Correlation matrix

Subset.df <- cars.df[,c("KMs.Driven","Price","Year")]
corMat <- cor(Subset.df)
# round off upto 2 decimal places
round(corMat, 8)
##             KMs.Driven       Price       Year
## KMs.Driven  1.00000000 -0.02268627 -0.1040009
## Price      -0.02268627  1.00000000  0.1737995
## Year       -0.10400086  0.17379951  1.0000000

#Visualizing correlations (ex- Correlogram)

library(corrplot)
## corrplot 0.84 loaded
corrplot(cor(Subset.df), method = "circle")

corrplot(cor(Subset.df), method = "number")