Load packages
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
read the dataset cars.csv
cars<-read.csv("cars.csv")
Data structure for ‘cars’
str(cars)
## 'data.frame': 428 obs. of 19 variables:
## $ name : chr "Chevrolet Aveo 4dr" "Chevrolet Aveo LS 4dr hatch" "Chevrolet Cavalier 2dr" "Chevrolet Cavalier 4dr" ...
## $ sports_car : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ suv : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ wagon : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ minivan : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ pickup : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ all_wheel : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ rear_wheel : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ msrp : int 11690 12585 14610 14810 16385 13670 15040 13270 13730 15460 ...
## $ dealer_cost: int 10965 11802 13697 13884 15357 12849 14086 12482 12906 14496 ...
## $ eng_size : num 1.6 1.6 2.2 2.2 2.2 2 2 2 2 2 ...
## $ ncyl : int 4 4 4 4 4 4 4 4 4 4 ...
## $ horsepwr : int 103 103 140 140 140 132 132 130 110 130 ...
## $ city_mpg : int 28 28 26 26 26 29 29 26 27 26 ...
## $ hwy_mpg : int 34 34 37 37 37 36 36 33 36 33 ...
## $ weight : int 2370 2348 2617 2676 2617 2581 2626 2612 2606 2606 ...
## $ wheel_base : int 98 98 104 104 104 105 105 103 103 103 ...
## $ length : int 167 153 183 183 183 174 174 168 168 168 ...
## $ width : int 66 66 69 68 69 67 67 67 67 67 ...
Visualize histogram for city_mpg and sub-graph wrt Sport Utility Vehicle
ggplot(cars,aes(x=city_mpg))+geom_histogram()+facet_wrap(~ suv)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 14 rows containing non-finite values (stat_bin).

Filter cars with 4, 6, 8 cylinders
car<-cars %>%
filter(ncyl==c(4,6,8))
## Warning in ncyl == c(4, 6, 8): longer object length is not a multiple of shorter
## object length
Create box plots of city mpg by ncyl
car%>%
ggplot(aes(x=city_mpg,y=as.factor(ncyl)))+geom_boxplot()+coord_flip()
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

Create overlaid density plots for same data
ggplot(car, aes(x = city_mpg, fill = as.factor(ncyl))) + geom_density(alpha =.4)
## Warning: Removed 3 rows containing non-finite values (stat_density).

Create hist of horsepwr
ggplot(car,aes(horsepwr))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Create hist of horsepwr with binwidth of 3
car %>%
ggplot(aes(horsepwr))+geom_histogram(binwidth = 3)

Construct box plot of msrp
car%>%
ggplot(aes(msrp))+geom_boxplot()+coord_flip()

Exclude outliers from data more than 100000
car %>%
filter(msrp < 100000)%>%
ggplot(aes(x = 1, y = msrp)) +geom_boxplot()
