Exploring numerical data

Load package

library(ggplot2)
cars<-read.csv("cars.csv")

Data structure for ‘cars’

str(cars)
## 'data.frame':    428 obs. of  19 variables:
##  $ name       : chr  "Chevrolet Aveo 4dr" "Chevrolet Aveo LS 4dr hatch" "Chevrolet Cavalier 2dr" "Chevrolet Cavalier 4dr" ...
##  $ sports_car : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ suv        : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ wagon      : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ minivan    : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ pickup     : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ all_wheel  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ rear_wheel : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ msrp       : int  11690 12585 14610 14810 16385 13670 15040 13270 13730 15460 ...
##  $ dealer_cost: int  10965 11802 13697 13884 15357 12849 14086 12482 12906 14496 ...
##  $ eng_size   : num  1.6 1.6 2.2 2.2 2.2 2 2 2 2 2 ...
##  $ ncyl       : int  4 4 4 4 4 4 4 4 4 4 ...
##  $ horsepwr   : int  103 103 140 140 140 132 132 130 110 130 ...
##  $ city_mpg   : int  28 28 26 26 26 29 29 26 27 26 ...
##  $ hwy_mpg    : int  34 34 37 37 37 36 36 33 36 33 ...
##  $ weight     : int  2370 2348 2617 2676 2617 2581 2626 2612 2606 2606 ...
##  $ wheel_base : int  98 98 104 104 104 105 105 103 103 103 ...
##  $ length     : int  167 153 183 183 183 174 174 168 168 168 ...
##  $ width      : int  66 66 69 68 69 67 67 67 67 67 ...
head(cars)
##                          name sports_car   suv wagon minivan pickup all_wheel
## 1          Chevrolet Aveo 4dr      FALSE FALSE FALSE   FALSE  FALSE     FALSE
## 2 Chevrolet Aveo LS 4dr hatch      FALSE FALSE FALSE   FALSE  FALSE     FALSE
## 3      Chevrolet Cavalier 2dr      FALSE FALSE FALSE   FALSE  FALSE     FALSE
## 4      Chevrolet Cavalier 4dr      FALSE FALSE FALSE   FALSE  FALSE     FALSE
## 5   Chevrolet Cavalier LS 2dr      FALSE FALSE FALSE   FALSE  FALSE     FALSE
## 6           Dodge Neon SE 4dr      FALSE FALSE FALSE   FALSE  FALSE     FALSE
##   rear_wheel  msrp dealer_cost eng_size ncyl horsepwr city_mpg hwy_mpg weight
## 1      FALSE 11690       10965      1.6    4      103       28      34   2370
## 2      FALSE 12585       11802      1.6    4      103       28      34   2348
## 3      FALSE 14610       13697      2.2    4      140       26      37   2617
## 4      FALSE 14810       13884      2.2    4      140       26      37   2676
## 5      FALSE 16385       15357      2.2    4      140       26      37   2617
## 6      FALSE 13670       12849      2.0    4      132       29      36   2581
##   wheel_base length width
## 1         98    167    66
## 2         98    153    66
## 3        104    183    69
## 4        104    183    68
## 5        104    183    69
## 6        105    174    67
  1. Visualize histogram for city_mpg and sub-graph wrt Sport Utility Vehicle
ggplot(data=cars,aes(x=city_mpg))+
  geom_histogram(binwidth = 5,fill="red",colour="black")+
    facet_wrap(~suv)
## Warning: Removed 14 rows containing non-finite values (stat_bin).

  1. Filter cars with 4, 6, 8 cylinders
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
cars_f<-cars%>%
  filter(ncyl%in%c(4,6,8))
  1. Create box plots of city mpg by ncyl
ggplot(data=cars_f,aes(x=as.factor(ncyl),y=city_mpg))+
  geom_boxplot()
## Warning: Removed 11 rows containing non-finite values (stat_boxplot).

  1. Create overlaid density plots for same data
ggplot(data=cars_f,
       aes(x=city_mpg,fill=as.factor(ncyl)))+
  geom_density(alpha=0.5)
## Warning: Removed 11 rows containing non-finite values (stat_density).

Create hist of horsepwr

cars_f%>%
  ggplot(aes(x=horsepwr))+
  geom_histogram(fill="brown",color="white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Create hist of horsepwr with binwidth of 3

cars_f%>%
  ggplot(aes(x=horsepwr))+
  geom_histogram(binwidth=3,fill="brown",color="grey")

Construct box plot of msrp

 cars_f%>%
   ggplot(aes(y=msrp))+
   geom_boxplot()

Exclude outliers from data more than 100000

 cars_f%>%
   ggplot(aes(y=msrp))+
   geom_boxplot()+
   coord_cartesian(ylim=c(0,100000))