setwd("C:/Users/Jaya/Desktop/intership/project")
carad.df <- read.csv(paste("car_ad.csv", sep=""))
View(carad.df)
dim(carad.df)
## [1] 9576 10
->Lenght-10 Breadth-9576
## car price body mileage
## Volkswagen : 936 Min. : 0 crossover:2069 Min. : 0.0
## Mercedes-Benz: 921 1st Qu.: 4999 hatch :1252 1st Qu.: 70.0
## BMW : 694 Median : 9200 other : 838 Median :128.0
## Toyota : 541 Mean : 15633 sedan :3646 Mean :138.9
## VAZ : 489 3rd Qu.: 16700 vagon : 722 3rd Qu.:194.0
## Renault : 469 Max. :547800 van :1049 Max. :999.0
## (Other) :5526
## engV engType registration year
## Min. : 0.100 Diesel:3013 no : 561 Min. :1953
## 1st Qu.: 1.600 Gas :1722 yes:9015 1st Qu.:2004
## Median : 2.000 Other : 462 Median :2008
## Mean : 2.646 Petrol:4379 Mean :2007
## 3rd Qu.: 2.500 3rd Qu.:2012
## Max. :99.990 Max. :2016
## NA's :434
## model drive
## E-Class : 199 : 511
## A6 : 172 front:5188
## Camry : 134 full :2500
## Vito ïà ññ.: 131 rear :1377
## Lanos : 127
## X5 : 119
## (Other) :8694
## vars n mean sd median trimmed mad min
## car* 1 9576 51.09 25.63 53.0 52.65 34.10 1.0
## price 2 9576 15633.32 24106.52 9200.0 10981.42 7709.52 0.0
## body* 3 9576 3.30 1.60 4.0 3.25 1.48 1.0
## mileage 4 9576 138.86 98.63 128.0 131.20 91.92 0.0
## engV 5 9142 2.65 5.93 2.0 2.10 0.74 0.1
## engType* 6 9576 2.65 1.33 3.0 2.69 1.48 1.0
## registration* 7 9576 1.94 0.23 2.0 2.00 0.00 1.0
## year 8 9576 2006.61 7.07 2008.0 2007.52 5.93 1953.0
## model* 9 9576 455.45 255.42 486.5 456.62 328.40 1.0
## drive* 10 9576 2.50 0.80 2.0 2.44 0.00 1.0
## max range skew kurtosis se
## car* 87.00 86.00 -0.42 -1.04 0.26
## price 547800.00 547800.00 7.13 93.67 246.34
## body* 6.00 5.00 -0.06 -1.05 0.02
## mileage 999.00 999.00 1.30 5.15 1.01
## engV 99.99 99.89 15.18 239.61 0.06
## engType* 4.00 3.00 -0.14 -1.76 0.01
## registration* 2.00 1.00 -3.76 12.13 0.00
## year 2016.00 63.00 -1.55 3.87 0.07
## model* 888.00 887.00 -0.03 -1.18 2.61
## drive* 4.00 3.00 0.54 -0.46 0.01
mytable <- with(carad.df, table(engType))
prop.table(mytable)*100 #percentage
## engType
## Diesel Gas Other Petrol
## 31.464077 17.982456 4.824561 45.728906
mytable #COUNT
## engType
## Diesel Gas Other Petrol
## 3013 1722 462 4379
mytable <- with(carad.df, table(body))
prop.table(mytable)*100 #percentage
## body
## crossover hatch other sedan vagon van
## 21.606099 13.074353 8.751044 38.074353 7.539683 10.954470
mytable #count
## body
## crossover hatch other sedan vagon van
## 2069 1252 838 3646 722 1049
mytable <- with(carad.df, table(registration))
prop.table(mytable)*100 #percentage
## registration
## no yes
## 5.858396 94.141604
mytable #count
## registration
## no yes
## 561 9015
mytable <- with(carad.df, table(drive))
prop.table(mytable)*100 #percentage
## drive
## front full rear
## 5.336257 54.177109 26.106934 14.379699
mytable #count
## drive
## front full rear
## 511 5188 2500 1377
mytable1 <- xtabs(~ car+body, data=carad.df)
addmargins(mytable) #count
## drive
## front full rear Sum
## 511 5188 2500 1377 9576
addmargins(prop.table(mytable)*100) #percentage
## drive
## front full rear Sum
## 5.336257 54.177109 26.106934 14.379699 100.000000
mytable2 <- xtabs(~ car+engType, data=carad.df)
addmargins(mytable) #count
## drive
## front full rear Sum
## 511 5188 2500 1377 9576
addmargins(prop.table(mytable)*100) #percentage
## drive
## front full rear Sum
## 5.336257 54.177109 26.106934 14.379699 100.000000
mytable3 <- xtabs(~ car+registration, data=carad.df)
addmargins(mytable) #count
## drive
## front full rear Sum
## 511 5188 2500 1377 9576
addmargins(prop.table(mytable)*100) #percentage
## drive
## front full rear Sum
## 5.336257 54.177109 26.106934 14.379699 100.000000
mytable4 <- xtabs(~ car+drive, data=carad.df)
addmargins(mytable) #count
## drive
## front full rear Sum
## 511 5188 2500 1377 9576
addmargins(prop.table(mytable)*100) #percentage
## drive
## front full rear Sum
## 5.336257 54.177109 26.106934 14.379699 100.000000
mytable5 <- xtabs(~ body+engType, data=carad.df)
addmargins(mytable) #count
## drive
## front full rear Sum
## 511 5188 2500 1377 9576
addmargins(prop.table(mytable)*100) #percentage
## drive
## front full rear Sum
## 5.336257 54.177109 26.106934 14.379699 100.000000
mytable6 <- xtabs(~ body+registration, data=carad.df)
addmargins(mytable) #count
## drive
## front full rear Sum
## 511 5188 2500 1377 9576
addmargins(prop.table(mytable)*100) #percentage
## drive
## front full rear Sum
## 5.336257 54.177109 26.106934 14.379699 100.000000
mytable7 <- xtabs(~ body+drive, data=carad.df)
addmargins(mytable) #count
## drive
## front full rear Sum
## 511 5188 2500 1377 9576
addmargins(prop.table(mytable)*100) #percentage
## drive
## front full rear Sum
## 5.336257 54.177109 26.106934 14.379699 100.000000
mytable8 <- xtabs(~ engType+registration, data=carad.df)
addmargins(mytable) #count
## drive
## front full rear Sum
## 511 5188 2500 1377 9576
addmargins(prop.table(mytable)*100) #percentage
## drive
## front full rear Sum
## 5.336257 54.177109 26.106934 14.379699 100.000000
mytable9 <- xtabs(~ engType+drive, data=carad.df)
addmargins(mytable) #count
## drive
## front full rear Sum
## 511 5188 2500 1377 9576
addmargins(prop.table(mytable)*100) #percentage
## drive
## front full rear Sum
## 5.336257 54.177109 26.106934 14.379699 100.000000
mytable10 <- xtabs(~ registration+drive, data=carad.df)
addmargins(mytable) #count
## drive
## front full rear Sum
## 511 5188 2500 1377 9576
addmargins(prop.table(mytable)*100) #percentage
## drive
## front full rear Sum
## 5.336257 54.177109 26.106934 14.379699 100.000000
boxplot(carad.df$price)
##b) Boxplot for mileage of car.
boxplot(carad.df$mileage)
##c) Boxplot of Advertisment cost of car.
boxplot(carad.df$price)
library(lattice)
histogram(~car | body, data=carad.df)
##b) For engine type and body
library(lattice)
histogram(~engType | body, data=carad.df)
##c) For car and drive
library(lattice)
histogram(~car | drive, data=carad.df)
library(lattice)
plot(x=carad.df$price, y=carad.df$car)
#b) For milleage
library(lattice)
barplot(carad.df$mileage)
#C) For cars
library(lattice)
barplot(carad.df$engV)
library(corrgram)
corrgram(carad.df, order=FALSE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Corrgram of Variables")
plot(carad.df[1:5])
chisq.test(mytable1)
## Warning in chisq.test(mytable1): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable1
## X-squared = 6544.5, df = 430, p-value < 2.2e-16
chisq.test(mytable2)
## Warning in chisq.test(mytable2): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable2
## X-squared = 3511.8, df = 258, p-value < 2.2e-16
chisq.test(mytable3)
## Warning in chisq.test(mytable3): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable3
## X-squared = 605.8, df = 86, p-value < 2.2e-16
chisq.test(mytable4)
## Warning in chisq.test(mytable4): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable4
## X-squared = 7250.9, df = 258, p-value < 2.2e-16
chisq.test(mytable5)
##
## Pearson's Chi-squared test
##
## data: mytable5
## X-squared = 2533, df = 15, p-value < 2.2e-16
chisq.test(mytable6)
##
## Pearson's Chi-squared test
##
## data: mytable6
## X-squared = 283.93, df = 5, p-value < 2.2e-16
chisq.test(mytable7)
##
## Pearson's Chi-squared test
##
## data: mytable7
## X-squared = 6291.6, df = 15, p-value < 2.2e-16
chisq.test(mytable8)
##
## Pearson's Chi-squared test
##
## data: mytable8
## X-squared = 307.3, df = 3, p-value < 2.2e-16
chisq.test(mytable9)
##
## Pearson's Chi-squared test
##
## data: mytable9
## X-squared = 442.43, df = 9, p-value < 2.2e-16
chisq.test(mytable10)
##
## Pearson's Chi-squared test
##
## data: mytable10
## X-squared = 92.759, df = 3, p-value < 2.2e-16
t.test(mytable1)
##
## One Sample t-test
##
## data: mytable1
## t = 8.8793, df = 521, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 14.28605 22.40360
## sample estimates:
## mean of x
## 18.34483
t.test(mytable2)
##
## One Sample t-test
##
## data: mytable2
## t = 7.6262, df = 347, p-value = 2.344e-13
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 20.42048 34.61400
## sample estimates:
## mean of x
## 27.51724
t.test(mytable3)
##
## One Sample t-test
##
## data: mytable3
## t = 5.2714, df = 173, p-value = 3.996e-07
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 34.42808 75.64088
## sample estimates:
## mean of x
## 55.03448
t.test(mytable4)
##
## One Sample t-test
##
## data: mytable4
## t = 6.539, df = 347, p-value = 2.218e-10
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 19.24047 35.79401
## sample estimates:
## mean of x
## 27.51724
t.test(mytable5)
##
## One Sample t-test
##
## data: mytable5
## t = 4.2046, df = 23, p-value = 0.0003381
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 202.6946 595.3054
## sample estimates:
## mean of x
## 399
t.test(mytable6)
##
## One Sample t-test
##
## data: mytable6
## t = 2.7029, df = 11, p-value = 0.02055
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 148.193 1447.807
## sample estimates:
## mean of x
## 798
t.test(mytable7)
##
## One Sample t-test
##
## data: mytable7
## t = 3.2922, df = 23, p-value = 0.003189
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 148.2914 649.7086
## sample estimates:
## mean of x
## 399
t.test(mytable8)
##
## One Sample t-test
##
## data: mytable8
## t = 2.1828, df = 7, p-value = 0.06537
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -99.69983 2493.69983
## sample estimates:
## mean of x
## 1197
t.test(mytable9)
##
## One Sample t-test
##
## data: mytable9
## t = 3.596, df = 15, p-value = 0.002647
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 243.7563 953.2437
## sample estimates:
## mean of x
## 598.5
t.test(mytable10)
##
## One Sample t-test
##
## data: mytable10
## t = 1.9925, df = 7, p-value = 0.08658
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -223.5856 2617.5856
## sample estimates:
## mean of x
## 1197