1. Read your dataset in R and visualize the length and breadth of your dataset.

 setwd("C:/Users/Jaya/Desktop/intership/project")
carad.df <- read.csv(paste("car_ad.csv", sep=""))
View(carad.df)
dim(carad.df) 
## [1] 9576   10

->Lenght-10 Breadth-9576

2. Create a descriptive statistics (min, max, median etc) of each variable.

##             car           price               body         mileage     
##  Volkswagen   : 936   Min.   :     0   crossover:2069   Min.   :  0.0  
##  Mercedes-Benz: 921   1st Qu.:  4999   hatch    :1252   1st Qu.: 70.0  
##  BMW          : 694   Median :  9200   other    : 838   Median :128.0  
##  Toyota       : 541   Mean   : 15633   sedan    :3646   Mean   :138.9  
##  VAZ          : 489   3rd Qu.: 16700   vagon    : 722   3rd Qu.:194.0  
##  Renault      : 469   Max.   :547800   van      :1049   Max.   :999.0  
##  (Other)      :5526                                                    
##       engV          engType     registration      year     
##  Min.   : 0.100   Diesel:3013   no : 561     Min.   :1953  
##  1st Qu.: 1.600   Gas   :1722   yes:9015     1st Qu.:2004  
##  Median : 2.000   Other : 462                Median :2008  
##  Mean   : 2.646   Petrol:4379                Mean   :2007  
##  3rd Qu.: 2.500                              3rd Qu.:2012  
##  Max.   :99.990                              Max.   :2016  
##  NA's   :434                                               
##         model        drive     
##  E-Class   : 199        : 511  
##  A6        : 172   front:5188  
##  Camry     : 134   full :2500  
##  Vito ïàññ.: 131   rear :1377  
##  Lanos     : 127               
##  X5        : 119               
##  (Other)   :8694
##               vars    n     mean       sd median  trimmed     mad    min
## car*             1 9576    51.09    25.63   53.0    52.65   34.10    1.0
## price            2 9576 15633.32 24106.52 9200.0 10981.42 7709.52    0.0
## body*            3 9576     3.30     1.60    4.0     3.25    1.48    1.0
## mileage          4 9576   138.86    98.63  128.0   131.20   91.92    0.0
## engV             5 9142     2.65     5.93    2.0     2.10    0.74    0.1
## engType*         6 9576     2.65     1.33    3.0     2.69    1.48    1.0
## registration*    7 9576     1.94     0.23    2.0     2.00    0.00    1.0
## year             8 9576  2006.61     7.07 2008.0  2007.52    5.93 1953.0
## model*           9 9576   455.45   255.42  486.5   456.62  328.40    1.0
## drive*          10 9576     2.50     0.80    2.0     2.44    0.00    1.0
##                     max     range  skew kurtosis     se
## car*              87.00     86.00 -0.42    -1.04   0.26
## price         547800.00 547800.00  7.13    93.67 246.34
## body*              6.00      5.00 -0.06    -1.05   0.02
## mileage          999.00    999.00  1.30     5.15   1.01
## engV              99.99     99.89 15.18   239.61   0.06
## engType*           4.00      3.00 -0.14    -1.76   0.01
## registration*      2.00      1.00 -3.76    12.13   0.00
## year            2016.00     63.00 -1.55     3.87   0.07
## model*           888.00    887.00 -0.03    -1.18   2.61
## drive*             4.00      3.00  0.54    -0.46   0.01

3. Create one-way contingency tables for the categorical variables in your dataset.

a) engine type

mytable <- with(carad.df, table(engType))
prop.table(mytable)*100 #percentage
## engType
##    Diesel       Gas     Other    Petrol 
## 31.464077 17.982456  4.824561 45.728906
mytable #COUNT
## engType
## Diesel    Gas  Other Petrol 
##   3013   1722    462   4379

b) Body type of car

mytable <- with(carad.df, table(body))
prop.table(mytable)*100 #percentage
## body
## crossover     hatch     other     sedan     vagon       van 
## 21.606099 13.074353  8.751044 38.074353  7.539683 10.954470
mytable #count
## body
## crossover     hatch     other     sedan     vagon       van 
##      2069      1252       838      3646       722      1049

c) Registration

mytable <- with(carad.df, table(registration))
prop.table(mytable)*100 #percentage
## registration
##        no       yes 
##  5.858396 94.141604
mytable #count
## registration
##   no  yes 
##  561 9015

d) Drive

mytable <- with(carad.df, table(drive))
prop.table(mytable)*100 #percentage
## drive
##               front      full      rear 
##  5.336257 54.177109 26.106934 14.379699
mytable #count
## drive
##       front  full  rear 
##   511  5188  2500  1377

4. Create two-way contingency tables for the categorical variables in your dataset.Along with percentages

a) Car and body type

mytable1 <- xtabs(~ car+body, data=carad.df)
addmargins(mytable) #count
## drive
##       front  full  rear   Sum 
##   511  5188  2500  1377  9576
addmargins(prop.table(mytable)*100) #percentage
## drive
##                 front       full       rear        Sum 
##   5.336257  54.177109  26.106934  14.379699 100.000000

b) Car and Engine type

mytable2 <- xtabs(~ car+engType, data=carad.df)
addmargins(mytable) #count
## drive
##       front  full  rear   Sum 
##   511  5188  2500  1377  9576
addmargins(prop.table(mytable)*100) #percentage
## drive
##                 front       full       rear        Sum 
##   5.336257  54.177109  26.106934  14.379699 100.000000

c) Car and Registration

mytable3 <- xtabs(~ car+registration, data=carad.df)
addmargins(mytable) #count
## drive
##       front  full  rear   Sum 
##   511  5188  2500  1377  9576
addmargins(prop.table(mytable)*100) #percentage
## drive
##                 front       full       rear        Sum 
##   5.336257  54.177109  26.106934  14.379699 100.000000

d) Car and drive type

mytable4 <- xtabs(~ car+drive, data=carad.df)
addmargins(mytable) #count
## drive
##       front  full  rear   Sum 
##   511  5188  2500  1377  9576
addmargins(prop.table(mytable)*100) #percentage
## drive
##                 front       full       rear        Sum 
##   5.336257  54.177109  26.106934  14.379699 100.000000

e) Body and engine type

mytable5 <- xtabs(~ body+engType, data=carad.df)
addmargins(mytable) #count
## drive
##       front  full  rear   Sum 
##   511  5188  2500  1377  9576
addmargins(prop.table(mytable)*100) #percentage
## drive
##                 front       full       rear        Sum 
##   5.336257  54.177109  26.106934  14.379699 100.000000

f) body and Registraion

mytable6 <- xtabs(~ body+registration, data=carad.df)
addmargins(mytable) #count
## drive
##       front  full  rear   Sum 
##   511  5188  2500  1377  9576
addmargins(prop.table(mytable)*100) #percentage
## drive
##                 front       full       rear        Sum 
##   5.336257  54.177109  26.106934  14.379699 100.000000

g) body and drive type

mytable7 <- xtabs(~ body+drive, data=carad.df)
addmargins(mytable) #count
## drive
##       front  full  rear   Sum 
##   511  5188  2500  1377  9576
addmargins(prop.table(mytable)*100) #percentage
## drive
##                 front       full       rear        Sum 
##   5.336257  54.177109  26.106934  14.379699 100.000000

h)Engine and Registration

mytable8 <- xtabs(~ engType+registration, data=carad.df)
addmargins(mytable) #count
## drive
##       front  full  rear   Sum 
##   511  5188  2500  1377  9576
addmargins(prop.table(mytable)*100) #percentage
## drive
##                 front       full       rear        Sum 
##   5.336257  54.177109  26.106934  14.379699 100.000000

i) Engine and Drive type

mytable9 <- xtabs(~ engType+drive, data=carad.df)
addmargins(mytable) #count
## drive
##       front  full  rear   Sum 
##   511  5188  2500  1377  9576
addmargins(prop.table(mytable)*100) #percentage
## drive
##                 front       full       rear        Sum 
##   5.336257  54.177109  26.106934  14.379699 100.000000

j) Registation and Drive type

mytable10 <- xtabs(~ registration+drive, data=carad.df)
addmargins(mytable) #count
## drive
##       front  full  rear   Sum 
##   511  5188  2500  1377  9576
addmargins(prop.table(mytable)*100) #percentage
## drive
##                 front       full       rear        Sum 
##   5.336257  54.177109  26.106934  14.379699 100.000000

5. Draw a boxplot of the variables that belong to your study.

a) Boxplot of Advertisment cost of car.

boxplot(carad.df$price)

##b) Boxplot for mileage of car.

boxplot(carad.df$mileage)

##c) Boxplot of Advertisment cost of car.

boxplot(carad.df$price)

6. Draw Histograms for your suitable data fields.

a) For car and body

library(lattice)
histogram(~car | body, data=carad.df)

##b) For engine type and body

library(lattice)
histogram(~engType | body, data=carad.df)

##c) For car and drive

library(lattice)
histogram(~car | drive, data=carad.df)

7. Draw suitable plot for your data fields.

a) For price of advertising cars.

library(lattice)
plot(x=carad.df$price, y=carad.df$car)

#b) For milleage

library(lattice)
barplot(carad.df$mileage)

#C) For cars

library(lattice)
barplot(carad.df$engV)

8 SORRY.! sir.I tried a lot but there was some error in showing corelation matrix

9. Create a correlation matrix.

library(corrgram)
corrgram(carad.df, order=FALSE, lower.panel=panel.shade,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="Corrgram of Variables")

10. Create a scatter plot matrix for your data set.

plot(carad.df[1:5])

11. Chi square test

a) Car and body type

  chisq.test(mytable1)
## Warning in chisq.test(mytable1): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mytable1
## X-squared = 6544.5, df = 430, p-value < 2.2e-16

b) Car and Engine type

  chisq.test(mytable2)
## Warning in chisq.test(mytable2): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mytable2
## X-squared = 3511.8, df = 258, p-value < 2.2e-16

c) Car and Registration

  chisq.test(mytable3)
## Warning in chisq.test(mytable3): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mytable3
## X-squared = 605.8, df = 86, p-value < 2.2e-16

d) Car and drive type

  chisq.test(mytable4)
## Warning in chisq.test(mytable4): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mytable4
## X-squared = 7250.9, df = 258, p-value < 2.2e-16

e) Body and engine type

  chisq.test(mytable5)
## 
##  Pearson's Chi-squared test
## 
## data:  mytable5
## X-squared = 2533, df = 15, p-value < 2.2e-16

f) body and Registraion

  chisq.test(mytable6)
## 
##  Pearson's Chi-squared test
## 
## data:  mytable6
## X-squared = 283.93, df = 5, p-value < 2.2e-16

g) body and drive type

  chisq.test(mytable7)
## 
##  Pearson's Chi-squared test
## 
## data:  mytable7
## X-squared = 6291.6, df = 15, p-value < 2.2e-16

h)Engine and Registration

  chisq.test(mytable8)
## 
##  Pearson's Chi-squared test
## 
## data:  mytable8
## X-squared = 307.3, df = 3, p-value < 2.2e-16

i) Engine and Drive type

  chisq.test(mytable9)
## 
##  Pearson's Chi-squared test
## 
## data:  mytable9
## X-squared = 442.43, df = 9, p-value < 2.2e-16

j) Registation and Drive type

  chisq.test(mytable10)
## 
##  Pearson's Chi-squared test
## 
## data:  mytable10
## X-squared = 92.759, df = 3, p-value < 2.2e-16

12. t-test

a) Car and body type

  t.test(mytable1)
## 
##  One Sample t-test
## 
## data:  mytable1
## t = 8.8793, df = 521, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  14.28605 22.40360
## sample estimates:
## mean of x 
##  18.34483

b) Car and Engine type

  t.test(mytable2)
## 
##  One Sample t-test
## 
## data:  mytable2
## t = 7.6262, df = 347, p-value = 2.344e-13
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  20.42048 34.61400
## sample estimates:
## mean of x 
##  27.51724

c) Car and Registration

  t.test(mytable3)
## 
##  One Sample t-test
## 
## data:  mytable3
## t = 5.2714, df = 173, p-value = 3.996e-07
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  34.42808 75.64088
## sample estimates:
## mean of x 
##  55.03448

d) Car and drive type

  t.test(mytable4)
## 
##  One Sample t-test
## 
## data:  mytable4
## t = 6.539, df = 347, p-value = 2.218e-10
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  19.24047 35.79401
## sample estimates:
## mean of x 
##  27.51724

e) Body and engine type

  t.test(mytable5)
## 
##  One Sample t-test
## 
## data:  mytable5
## t = 4.2046, df = 23, p-value = 0.0003381
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  202.6946 595.3054
## sample estimates:
## mean of x 
##       399

f) body and Registraion

  t.test(mytable6)
## 
##  One Sample t-test
## 
## data:  mytable6
## t = 2.7029, df = 11, p-value = 0.02055
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##   148.193 1447.807
## sample estimates:
## mean of x 
##       798

g) body and drive type

  t.test(mytable7)
## 
##  One Sample t-test
## 
## data:  mytable7
## t = 3.2922, df = 23, p-value = 0.003189
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  148.2914 649.7086
## sample estimates:
## mean of x 
##       399

h)Engine and Registration

  t.test(mytable8)
## 
##  One Sample t-test
## 
## data:  mytable8
## t = 2.1828, df = 7, p-value = 0.06537
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##   -99.69983 2493.69983
## sample estimates:
## mean of x 
##      1197

i) Engine and Drive type

  t.test(mytable9)
## 
##  One Sample t-test
## 
## data:  mytable9
## t = 3.596, df = 15, p-value = 0.002647
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  243.7563 953.2437
## sample estimates:
## mean of x 
##     598.5

j) Registation and Drive type

  t.test(mytable10)
## 
##  One Sample t-test
## 
## data:  mytable10
## t = 1.9925, df = 7, p-value = 0.08658
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -223.5856 2617.5856
## sample estimates:
## mean of x 
##      1197