Load package
library(ggplot2)
cars<-read.csv("cars.csv")
Data structure for ‘cars’
str(cars)
## 'data.frame': 428 obs. of 19 variables:
## $ name : chr "Chevrolet Aveo 4dr" "Chevrolet Aveo LS 4dr hatch" "Chevrolet Cavalier 2dr" "Chevrolet Cavalier 4dr" ...
## $ sports_car : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ suv : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ wagon : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ minivan : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ pickup : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ all_wheel : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ rear_wheel : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ msrp : int 11690 12585 14610 14810 16385 13670 15040 13270 13730 15460 ...
## $ dealer_cost: int 10965 11802 13697 13884 15357 12849 14086 12482 12906 14496 ...
## $ eng_size : num 1.6 1.6 2.2 2.2 2.2 2 2 2 2 2 ...
## $ ncyl : int 4 4 4 4 4 4 4 4 4 4 ...
## $ horsepwr : int 103 103 140 140 140 132 132 130 110 130 ...
## $ city_mpg : int 28 28 26 26 26 29 29 26 27 26 ...
## $ hwy_mpg : int 34 34 37 37 37 36 36 33 36 33 ...
## $ weight : int 2370 2348 2617 2676 2617 2581 2626 2612 2606 2606 ...
## $ wheel_base : int 98 98 104 104 104 105 105 103 103 103 ...
## $ length : int 167 153 183 183 183 174 174 168 168 168 ...
## $ width : int 66 66 69 68 69 67 67 67 67 67 ...
head(cars)
## name sports_car suv wagon minivan pickup all_wheel
## 1 Chevrolet Aveo 4dr FALSE FALSE FALSE FALSE FALSE FALSE
## 2 Chevrolet Aveo LS 4dr hatch FALSE FALSE FALSE FALSE FALSE FALSE
## 3 Chevrolet Cavalier 2dr FALSE FALSE FALSE FALSE FALSE FALSE
## 4 Chevrolet Cavalier 4dr FALSE FALSE FALSE FALSE FALSE FALSE
## 5 Chevrolet Cavalier LS 2dr FALSE FALSE FALSE FALSE FALSE FALSE
## 6 Dodge Neon SE 4dr FALSE FALSE FALSE FALSE FALSE FALSE
## rear_wheel msrp dealer_cost eng_size ncyl horsepwr city_mpg hwy_mpg weight
## 1 FALSE 11690 10965 1.6 4 103 28 34 2370
## 2 FALSE 12585 11802 1.6 4 103 28 34 2348
## 3 FALSE 14610 13697 2.2 4 140 26 37 2617
## 4 FALSE 14810 13884 2.2 4 140 26 37 2676
## 5 FALSE 16385 15357 2.2 4 140 26 37 2617
## 6 FALSE 13670 12849 2.0 4 132 29 36 2581
## wheel_base length width
## 1 98 167 66
## 2 98 153 66
## 3 104 183 69
## 4 104 183 68
## 5 104 183 69
## 6 105 174 67
ggplot(data=cars,aes(x=city_mpg))+
geom_histogram(binwidth = 5,fill="red",colour="black")+
facet_wrap(~suv)
## Warning: Removed 14 rows containing non-finite values (stat_bin).
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
cars_f<-cars%>%
filter(ncyl%in%c(4,6,8))
ggplot(data=cars_f,aes(x=as.factor(ncyl),y=city_mpg))+
geom_boxplot()
## Warning: Removed 11 rows containing non-finite values (stat_boxplot).
ggplot(data=cars_f,
aes(x=city_mpg,fill=as.factor(ncyl)))+
geom_density(alpha=0.5)
## Warning: Removed 11 rows containing non-finite values (stat_density).
Create hist of horsepwr
cars_f%>%
ggplot(aes(x=horsepwr))+
geom_histogram(fill="brown",color="white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Create hist of horsepwr with binwidth of 3
cars_f%>%
ggplot(aes(x=horsepwr))+
geom_histogram(binwidth=3,fill="brown",color="grey")
Construct box plot of msrp
cars_f%>%
ggplot(aes(y=msrp))+
geom_boxplot()
Exclude outliers from data more than 100000
cars_f%>%
ggplot(aes(y=msrp))+
geom_boxplot()+
coord_cartesian(ylim=c(0,100000))