library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
data(mpg, package = "ggplot2")
dim(mpg)
## [1] 234 11
str(mpg)
## Classes 'tbl_df', 'tbl' and 'data.frame': 234 obs. of 11 variables:
## $ manufacturer: chr "audi" "audi" "audi" "audi" ...
## $ model : chr "a4" "a4" "a4" "a4" ...
## $ displ : num 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr "f" "f" "f" "f" ...
## $ cty : int 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr "p" "p" "p" "p" ...
## $ class : chr "compact" "compact" "compact" "compact" ...
knitr::kable(mpg)
| audi |
a4 |
1.8 |
1999 |
4 |
auto(l5) |
f |
18 |
29 |
p |
compact |
| audi |
a4 |
1.8 |
1999 |
4 |
manual(m5) |
f |
21 |
29 |
p |
compact |
| audi |
a4 |
2.0 |
2008 |
4 |
manual(m6) |
f |
20 |
31 |
p |
compact |
| audi |
a4 |
2.0 |
2008 |
4 |
auto(av) |
f |
21 |
30 |
p |
compact |
| audi |
a4 |
2.8 |
1999 |
6 |
auto(l5) |
f |
16 |
26 |
p |
compact |
| audi |
a4 |
2.8 |
1999 |
6 |
manual(m5) |
f |
18 |
26 |
p |
compact |
| audi |
a4 |
3.1 |
2008 |
6 |
auto(av) |
f |
18 |
27 |
p |
compact |
| audi |
a4 quattro |
1.8 |
1999 |
4 |
manual(m5) |
4 |
18 |
26 |
p |
compact |
| audi |
a4 quattro |
1.8 |
1999 |
4 |
auto(l5) |
4 |
16 |
25 |
p |
compact |
| audi |
a4 quattro |
2.0 |
2008 |
4 |
manual(m6) |
4 |
20 |
28 |
p |
compact |
| audi |
a4 quattro |
2.0 |
2008 |
4 |
auto(s6) |
4 |
19 |
27 |
p |
compact |
| audi |
a4 quattro |
2.8 |
1999 |
6 |
auto(l5) |
4 |
15 |
25 |
p |
compact |
| audi |
a4 quattro |
2.8 |
1999 |
6 |
manual(m5) |
4 |
17 |
25 |
p |
compact |
| audi |
a4 quattro |
3.1 |
2008 |
6 |
auto(s6) |
4 |
17 |
25 |
p |
compact |
| audi |
a4 quattro |
3.1 |
2008 |
6 |
manual(m6) |
4 |
15 |
25 |
p |
compact |
| audi |
a6 quattro |
2.8 |
1999 |
6 |
auto(l5) |
4 |
15 |
24 |
p |
midsize |
| audi |
a6 quattro |
3.1 |
2008 |
6 |
auto(s6) |
4 |
17 |
25 |
p |
midsize |
| audi |
a6 quattro |
4.2 |
2008 |
8 |
auto(s6) |
4 |
16 |
23 |
p |
midsize |
| chevrolet |
c1500 suburban 2wd |
5.3 |
2008 |
8 |
auto(l4) |
r |
14 |
20 |
r |
suv |
| chevrolet |
c1500 suburban 2wd |
5.3 |
2008 |
8 |
auto(l4) |
r |
11 |
15 |
e |
suv |
| chevrolet |
c1500 suburban 2wd |
5.3 |
2008 |
8 |
auto(l4) |
r |
14 |
20 |
r |
suv |
| chevrolet |
c1500 suburban 2wd |
5.7 |
1999 |
8 |
auto(l4) |
r |
13 |
17 |
r |
suv |
| chevrolet |
c1500 suburban 2wd |
6.0 |
2008 |
8 |
auto(l4) |
r |
12 |
17 |
r |
suv |
| chevrolet |
corvette |
5.7 |
1999 |
8 |
manual(m6) |
r |
16 |
26 |
p |
2seater |
| chevrolet |
corvette |
5.7 |
1999 |
8 |
auto(l4) |
r |
15 |
23 |
p |
2seater |
| chevrolet |
corvette |
6.2 |
2008 |
8 |
manual(m6) |
r |
16 |
26 |
p |
2seater |
| chevrolet |
corvette |
6.2 |
2008 |
8 |
auto(s6) |
r |
15 |
25 |
p |
2seater |
| chevrolet |
corvette |
7.0 |
2008 |
8 |
manual(m6) |
r |
15 |
24 |
p |
2seater |
| chevrolet |
k1500 tahoe 4wd |
5.3 |
2008 |
8 |
auto(l4) |
4 |
14 |
19 |
r |
suv |
| chevrolet |
k1500 tahoe 4wd |
5.3 |
2008 |
8 |
auto(l4) |
4 |
11 |
14 |
e |
suv |
| chevrolet |
k1500 tahoe 4wd |
5.7 |
1999 |
8 |
auto(l4) |
4 |
11 |
15 |
r |
suv |
| chevrolet |
k1500 tahoe 4wd |
6.5 |
1999 |
8 |
auto(l4) |
4 |
14 |
17 |
d |
suv |
| chevrolet |
malibu |
2.4 |
1999 |
4 |
auto(l4) |
f |
19 |
27 |
r |
midsize |
| chevrolet |
malibu |
2.4 |
2008 |
4 |
auto(l4) |
f |
22 |
30 |
r |
midsize |
| chevrolet |
malibu |
3.1 |
1999 |
6 |
auto(l4) |
f |
18 |
26 |
r |
midsize |
| chevrolet |
malibu |
3.5 |
2008 |
6 |
auto(l4) |
f |
18 |
29 |
r |
midsize |
| chevrolet |
malibu |
3.6 |
2008 |
6 |
auto(s6) |
f |
17 |
26 |
r |
midsize |
| dodge |
caravan 2wd |
2.4 |
1999 |
4 |
auto(l3) |
f |
18 |
24 |
r |
minivan |
| dodge |
caravan 2wd |
3.0 |
1999 |
6 |
auto(l4) |
f |
17 |
24 |
r |
minivan |
| dodge |
caravan 2wd |
3.3 |
1999 |
6 |
auto(l4) |
f |
16 |
22 |
r |
minivan |
| dodge |
caravan 2wd |
3.3 |
1999 |
6 |
auto(l4) |
f |
16 |
22 |
r |
minivan |
| dodge |
caravan 2wd |
3.3 |
2008 |
6 |
auto(l4) |
f |
17 |
24 |
r |
minivan |
| dodge |
caravan 2wd |
3.3 |
2008 |
6 |
auto(l4) |
f |
17 |
24 |
r |
minivan |
| dodge |
caravan 2wd |
3.3 |
2008 |
6 |
auto(l4) |
f |
11 |
17 |
e |
minivan |
| dodge |
caravan 2wd |
3.8 |
1999 |
6 |
auto(l4) |
f |
15 |
22 |
r |
minivan |
| dodge |
caravan 2wd |
3.8 |
1999 |
6 |
auto(l4) |
f |
15 |
21 |
r |
minivan |
| dodge |
caravan 2wd |
3.8 |
2008 |
6 |
auto(l6) |
f |
16 |
23 |
r |
minivan |
| dodge |
caravan 2wd |
4.0 |
2008 |
6 |
auto(l6) |
f |
16 |
23 |
r |
minivan |
| dodge |
dakota pickup 4wd |
3.7 |
2008 |
6 |
manual(m6) |
4 |
15 |
19 |
r |
pickup |
| dodge |
dakota pickup 4wd |
3.7 |
2008 |
6 |
auto(l4) |
4 |
14 |
18 |
r |
pickup |
| dodge |
dakota pickup 4wd |
3.9 |
1999 |
6 |
auto(l4) |
4 |
13 |
17 |
r |
pickup |
| dodge |
dakota pickup 4wd |
3.9 |
1999 |
6 |
manual(m5) |
4 |
14 |
17 |
r |
pickup |
| dodge |
dakota pickup 4wd |
4.7 |
2008 |
8 |
auto(l5) |
4 |
14 |
19 |
r |
pickup |
| dodge |
dakota pickup 4wd |
4.7 |
2008 |
8 |
auto(l5) |
4 |
14 |
19 |
r |
pickup |
| dodge |
dakota pickup 4wd |
4.7 |
2008 |
8 |
auto(l5) |
4 |
9 |
12 |
e |
pickup |
| dodge |
dakota pickup 4wd |
5.2 |
1999 |
8 |
manual(m5) |
4 |
11 |
17 |
r |
pickup |
| dodge |
dakota pickup 4wd |
5.2 |
1999 |
8 |
auto(l4) |
4 |
11 |
15 |
r |
pickup |
| dodge |
durango 4wd |
3.9 |
1999 |
6 |
auto(l4) |
4 |
13 |
17 |
r |
suv |
| dodge |
durango 4wd |
4.7 |
2008 |
8 |
auto(l5) |
4 |
13 |
17 |
r |
suv |
| dodge |
durango 4wd |
4.7 |
2008 |
8 |
auto(l5) |
4 |
9 |
12 |
e |
suv |
| dodge |
durango 4wd |
4.7 |
2008 |
8 |
auto(l5) |
4 |
13 |
17 |
r |
suv |
| dodge |
durango 4wd |
5.2 |
1999 |
8 |
auto(l4) |
4 |
11 |
16 |
r |
suv |
| dodge |
durango 4wd |
5.7 |
2008 |
8 |
auto(l5) |
4 |
13 |
18 |
r |
suv |
| dodge |
durango 4wd |
5.9 |
1999 |
8 |
auto(l4) |
4 |
11 |
15 |
r |
suv |
| dodge |
ram 1500 pickup 4wd |
4.7 |
2008 |
8 |
manual(m6) |
4 |
12 |
16 |
r |
pickup |
| dodge |
ram 1500 pickup 4wd |
4.7 |
2008 |
8 |
auto(l5) |
4 |
9 |
12 |
e |
pickup |
| dodge |
ram 1500 pickup 4wd |
4.7 |
2008 |
8 |
auto(l5) |
4 |
13 |
17 |
r |
pickup |
| dodge |
ram 1500 pickup 4wd |
4.7 |
2008 |
8 |
auto(l5) |
4 |
13 |
17 |
r |
pickup |
| dodge |
ram 1500 pickup 4wd |
4.7 |
2008 |
8 |
manual(m6) |
4 |
12 |
16 |
r |
pickup |
| dodge |
ram 1500 pickup 4wd |
4.7 |
2008 |
8 |
manual(m6) |
4 |
9 |
12 |
e |
pickup |
| dodge |
ram 1500 pickup 4wd |
5.2 |
1999 |
8 |
auto(l4) |
4 |
11 |
15 |
r |
pickup |
| dodge |
ram 1500 pickup 4wd |
5.2 |
1999 |
8 |
manual(m5) |
4 |
11 |
16 |
r |
pickup |
| dodge |
ram 1500 pickup 4wd |
5.7 |
2008 |
8 |
auto(l5) |
4 |
13 |
17 |
r |
pickup |
| dodge |
ram 1500 pickup 4wd |
5.9 |
1999 |
8 |
auto(l4) |
4 |
11 |
15 |
r |
pickup |
| ford |
expedition 2wd |
4.6 |
1999 |
8 |
auto(l4) |
r |
11 |
17 |
r |
suv |
| ford |
expedition 2wd |
5.4 |
1999 |
8 |
auto(l4) |
r |
11 |
17 |
r |
suv |
| ford |
expedition 2wd |
5.4 |
2008 |
8 |
auto(l6) |
r |
12 |
18 |
r |
suv |
| ford |
explorer 4wd |
4.0 |
1999 |
6 |
auto(l5) |
4 |
14 |
17 |
r |
suv |
| ford |
explorer 4wd |
4.0 |
1999 |
6 |
manual(m5) |
4 |
15 |
19 |
r |
suv |
| ford |
explorer 4wd |
4.0 |
1999 |
6 |
auto(l5) |
4 |
14 |
17 |
r |
suv |
| ford |
explorer 4wd |
4.0 |
2008 |
6 |
auto(l5) |
4 |
13 |
19 |
r |
suv |
| ford |
explorer 4wd |
4.6 |
2008 |
8 |
auto(l6) |
4 |
13 |
19 |
r |
suv |
| ford |
explorer 4wd |
5.0 |
1999 |
8 |
auto(l4) |
4 |
13 |
17 |
r |
suv |
| ford |
f150 pickup 4wd |
4.2 |
1999 |
6 |
auto(l4) |
4 |
14 |
17 |
r |
pickup |
| ford |
f150 pickup 4wd |
4.2 |
1999 |
6 |
manual(m5) |
4 |
14 |
17 |
r |
pickup |
| ford |
f150 pickup 4wd |
4.6 |
1999 |
8 |
manual(m5) |
4 |
13 |
16 |
r |
pickup |
| ford |
f150 pickup 4wd |
4.6 |
1999 |
8 |
auto(l4) |
4 |
13 |
16 |
r |
pickup |
| ford |
f150 pickup 4wd |
4.6 |
2008 |
8 |
auto(l4) |
4 |
13 |
17 |
r |
pickup |
| ford |
f150 pickup 4wd |
5.4 |
1999 |
8 |
auto(l4) |
4 |
11 |
15 |
r |
pickup |
| ford |
f150 pickup 4wd |
5.4 |
2008 |
8 |
auto(l4) |
4 |
13 |
17 |
r |
pickup |
| ford |
mustang |
3.8 |
1999 |
6 |
manual(m5) |
r |
18 |
26 |
r |
subcompact |
| ford |
mustang |
3.8 |
1999 |
6 |
auto(l4) |
r |
18 |
25 |
r |
subcompact |
| ford |
mustang |
4.0 |
2008 |
6 |
manual(m5) |
r |
17 |
26 |
r |
subcompact |
| ford |
mustang |
4.0 |
2008 |
6 |
auto(l5) |
r |
16 |
24 |
r |
subcompact |
| ford |
mustang |
4.6 |
1999 |
8 |
auto(l4) |
r |
15 |
21 |
r |
subcompact |
| ford |
mustang |
4.6 |
1999 |
8 |
manual(m5) |
r |
15 |
22 |
r |
subcompact |
| ford |
mustang |
4.6 |
2008 |
8 |
manual(m5) |
r |
15 |
23 |
r |
subcompact |
| ford |
mustang |
4.6 |
2008 |
8 |
auto(l5) |
r |
15 |
22 |
r |
subcompact |
| ford |
mustang |
5.4 |
2008 |
8 |
manual(m6) |
r |
14 |
20 |
p |
subcompact |
| honda |
civic |
1.6 |
1999 |
4 |
manual(m5) |
f |
28 |
33 |
r |
subcompact |
| honda |
civic |
1.6 |
1999 |
4 |
auto(l4) |
f |
24 |
32 |
r |
subcompact |
| honda |
civic |
1.6 |
1999 |
4 |
manual(m5) |
f |
25 |
32 |
r |
subcompact |
| honda |
civic |
1.6 |
1999 |
4 |
manual(m5) |
f |
23 |
29 |
p |
subcompact |
| honda |
civic |
1.6 |
1999 |
4 |
auto(l4) |
f |
24 |
32 |
r |
subcompact |
| honda |
civic |
1.8 |
2008 |
4 |
manual(m5) |
f |
26 |
34 |
r |
subcompact |
| honda |
civic |
1.8 |
2008 |
4 |
auto(l5) |
f |
25 |
36 |
r |
subcompact |
| honda |
civic |
1.8 |
2008 |
4 |
auto(l5) |
f |
24 |
36 |
c |
subcompact |
| honda |
civic |
2.0 |
2008 |
4 |
manual(m6) |
f |
21 |
29 |
p |
subcompact |
| hyundai |
sonata |
2.4 |
1999 |
4 |
auto(l4) |
f |
18 |
26 |
r |
midsize |
| hyundai |
sonata |
2.4 |
1999 |
4 |
manual(m5) |
f |
18 |
27 |
r |
midsize |
| hyundai |
sonata |
2.4 |
2008 |
4 |
auto(l4) |
f |
21 |
30 |
r |
midsize |
| hyundai |
sonata |
2.4 |
2008 |
4 |
manual(m5) |
f |
21 |
31 |
r |
midsize |
| hyundai |
sonata |
2.5 |
1999 |
6 |
auto(l4) |
f |
18 |
26 |
r |
midsize |
| hyundai |
sonata |
2.5 |
1999 |
6 |
manual(m5) |
f |
18 |
26 |
r |
midsize |
| hyundai |
sonata |
3.3 |
2008 |
6 |
auto(l5) |
f |
19 |
28 |
r |
midsize |
| hyundai |
tiburon |
2.0 |
1999 |
4 |
auto(l4) |
f |
19 |
26 |
r |
subcompact |
| hyundai |
tiburon |
2.0 |
1999 |
4 |
manual(m5) |
f |
19 |
29 |
r |
subcompact |
| hyundai |
tiburon |
2.0 |
2008 |
4 |
manual(m5) |
f |
20 |
28 |
r |
subcompact |
| hyundai |
tiburon |
2.0 |
2008 |
4 |
auto(l4) |
f |
20 |
27 |
r |
subcompact |
| hyundai |
tiburon |
2.7 |
2008 |
6 |
auto(l4) |
f |
17 |
24 |
r |
subcompact |
| hyundai |
tiburon |
2.7 |
2008 |
6 |
manual(m6) |
f |
16 |
24 |
r |
subcompact |
| hyundai |
tiburon |
2.7 |
2008 |
6 |
manual(m5) |
f |
17 |
24 |
r |
subcompact |
| jeep |
grand cherokee 4wd |
3.0 |
2008 |
6 |
auto(l5) |
4 |
17 |
22 |
d |
suv |
| jeep |
grand cherokee 4wd |
3.7 |
2008 |
6 |
auto(l5) |
4 |
15 |
19 |
r |
suv |
| jeep |
grand cherokee 4wd |
4.0 |
1999 |
6 |
auto(l4) |
4 |
15 |
20 |
r |
suv |
| jeep |
grand cherokee 4wd |
4.7 |
1999 |
8 |
auto(l4) |
4 |
14 |
17 |
r |
suv |
| jeep |
grand cherokee 4wd |
4.7 |
2008 |
8 |
auto(l5) |
4 |
9 |
12 |
e |
suv |
| jeep |
grand cherokee 4wd |
4.7 |
2008 |
8 |
auto(l5) |
4 |
14 |
19 |
r |
suv |
| jeep |
grand cherokee 4wd |
5.7 |
2008 |
8 |
auto(l5) |
4 |
13 |
18 |
r |
suv |
| jeep |
grand cherokee 4wd |
6.1 |
2008 |
8 |
auto(l5) |
4 |
11 |
14 |
p |
suv |
| land rover |
range rover |
4.0 |
1999 |
8 |
auto(l4) |
4 |
11 |
15 |
p |
suv |
| land rover |
range rover |
4.2 |
2008 |
8 |
auto(s6) |
4 |
12 |
18 |
r |
suv |
| land rover |
range rover |
4.4 |
2008 |
8 |
auto(s6) |
4 |
12 |
18 |
r |
suv |
| land rover |
range rover |
4.6 |
1999 |
8 |
auto(l4) |
4 |
11 |
15 |
p |
suv |
| lincoln |
navigator 2wd |
5.4 |
1999 |
8 |
auto(l4) |
r |
11 |
17 |
r |
suv |
| lincoln |
navigator 2wd |
5.4 |
1999 |
8 |
auto(l4) |
r |
11 |
16 |
p |
suv |
| lincoln |
navigator 2wd |
5.4 |
2008 |
8 |
auto(l6) |
r |
12 |
18 |
r |
suv |
| mercury |
mountaineer 4wd |
4.0 |
1999 |
6 |
auto(l5) |
4 |
14 |
17 |
r |
suv |
| mercury |
mountaineer 4wd |
4.0 |
2008 |
6 |
auto(l5) |
4 |
13 |
19 |
r |
suv |
| mercury |
mountaineer 4wd |
4.6 |
2008 |
8 |
auto(l6) |
4 |
13 |
19 |
r |
suv |
| mercury |
mountaineer 4wd |
5.0 |
1999 |
8 |
auto(l4) |
4 |
13 |
17 |
r |
suv |
| nissan |
altima |
2.4 |
1999 |
4 |
manual(m5) |
f |
21 |
29 |
r |
compact |
| nissan |
altima |
2.4 |
1999 |
4 |
auto(l4) |
f |
19 |
27 |
r |
compact |
| nissan |
altima |
2.5 |
2008 |
4 |
auto(av) |
f |
23 |
31 |
r |
midsize |
| nissan |
altima |
2.5 |
2008 |
4 |
manual(m6) |
f |
23 |
32 |
r |
midsize |
| nissan |
altima |
3.5 |
2008 |
6 |
manual(m6) |
f |
19 |
27 |
p |
midsize |
| nissan |
altima |
3.5 |
2008 |
6 |
auto(av) |
f |
19 |
26 |
p |
midsize |
| nissan |
maxima |
3.0 |
1999 |
6 |
auto(l4) |
f |
18 |
26 |
r |
midsize |
| nissan |
maxima |
3.0 |
1999 |
6 |
manual(m5) |
f |
19 |
25 |
r |
midsize |
| nissan |
maxima |
3.5 |
2008 |
6 |
auto(av) |
f |
19 |
25 |
p |
midsize |
| nissan |
pathfinder 4wd |
3.3 |
1999 |
6 |
auto(l4) |
4 |
14 |
17 |
r |
suv |
| nissan |
pathfinder 4wd |
3.3 |
1999 |
6 |
manual(m5) |
4 |
15 |
17 |
r |
suv |
| nissan |
pathfinder 4wd |
4.0 |
2008 |
6 |
auto(l5) |
4 |
14 |
20 |
p |
suv |
| nissan |
pathfinder 4wd |
5.6 |
2008 |
8 |
auto(s5) |
4 |
12 |
18 |
p |
suv |
| pontiac |
grand prix |
3.1 |
1999 |
6 |
auto(l4) |
f |
18 |
26 |
r |
midsize |
| pontiac |
grand prix |
3.8 |
1999 |
6 |
auto(l4) |
f |
16 |
26 |
p |
midsize |
| pontiac |
grand prix |
3.8 |
1999 |
6 |
auto(l4) |
f |
17 |
27 |
r |
midsize |
| pontiac |
grand prix |
3.8 |
2008 |
6 |
auto(l4) |
f |
18 |
28 |
r |
midsize |
| pontiac |
grand prix |
5.3 |
2008 |
8 |
auto(s4) |
f |
16 |
25 |
p |
midsize |
| subaru |
forester awd |
2.5 |
1999 |
4 |
manual(m5) |
4 |
18 |
25 |
r |
suv |
| subaru |
forester awd |
2.5 |
1999 |
4 |
auto(l4) |
4 |
18 |
24 |
r |
suv |
| subaru |
forester awd |
2.5 |
2008 |
4 |
manual(m5) |
4 |
20 |
27 |
r |
suv |
| subaru |
forester awd |
2.5 |
2008 |
4 |
manual(m5) |
4 |
19 |
25 |
p |
suv |
| subaru |
forester awd |
2.5 |
2008 |
4 |
auto(l4) |
4 |
20 |
26 |
r |
suv |
| subaru |
forester awd |
2.5 |
2008 |
4 |
auto(l4) |
4 |
18 |
23 |
p |
suv |
| subaru |
impreza awd |
2.2 |
1999 |
4 |
auto(l4) |
4 |
21 |
26 |
r |
subcompact |
| subaru |
impreza awd |
2.2 |
1999 |
4 |
manual(m5) |
4 |
19 |
26 |
r |
subcompact |
| subaru |
impreza awd |
2.5 |
1999 |
4 |
manual(m5) |
4 |
19 |
26 |
r |
subcompact |
| subaru |
impreza awd |
2.5 |
1999 |
4 |
auto(l4) |
4 |
19 |
26 |
r |
subcompact |
| subaru |
impreza awd |
2.5 |
2008 |
4 |
auto(s4) |
4 |
20 |
25 |
p |
compact |
| subaru |
impreza awd |
2.5 |
2008 |
4 |
auto(s4) |
4 |
20 |
27 |
r |
compact |
| subaru |
impreza awd |
2.5 |
2008 |
4 |
manual(m5) |
4 |
19 |
25 |
p |
compact |
| subaru |
impreza awd |
2.5 |
2008 |
4 |
manual(m5) |
4 |
20 |
27 |
r |
compact |
| toyota |
4runner 4wd |
2.7 |
1999 |
4 |
manual(m5) |
4 |
15 |
20 |
r |
suv |
| toyota |
4runner 4wd |
2.7 |
1999 |
4 |
auto(l4) |
4 |
16 |
20 |
r |
suv |
| toyota |
4runner 4wd |
3.4 |
1999 |
6 |
auto(l4) |
4 |
15 |
19 |
r |
suv |
| toyota |
4runner 4wd |
3.4 |
1999 |
6 |
manual(m5) |
4 |
15 |
17 |
r |
suv |
| toyota |
4runner 4wd |
4.0 |
2008 |
6 |
auto(l5) |
4 |
16 |
20 |
r |
suv |
| toyota |
4runner 4wd |
4.7 |
2008 |
8 |
auto(l5) |
4 |
14 |
17 |
r |
suv |
| toyota |
camry |
2.2 |
1999 |
4 |
manual(m5) |
f |
21 |
29 |
r |
midsize |
| toyota |
camry |
2.2 |
1999 |
4 |
auto(l4) |
f |
21 |
27 |
r |
midsize |
| toyota |
camry |
2.4 |
2008 |
4 |
manual(m5) |
f |
21 |
31 |
r |
midsize |
| toyota |
camry |
2.4 |
2008 |
4 |
auto(l5) |
f |
21 |
31 |
r |
midsize |
| toyota |
camry |
3.0 |
1999 |
6 |
auto(l4) |
f |
18 |
26 |
r |
midsize |
| toyota |
camry |
3.0 |
1999 |
6 |
manual(m5) |
f |
18 |
26 |
r |
midsize |
| toyota |
camry |
3.5 |
2008 |
6 |
auto(s6) |
f |
19 |
28 |
r |
midsize |
| toyota |
camry solara |
2.2 |
1999 |
4 |
auto(l4) |
f |
21 |
27 |
r |
compact |
| toyota |
camry solara |
2.2 |
1999 |
4 |
manual(m5) |
f |
21 |
29 |
r |
compact |
| toyota |
camry solara |
2.4 |
2008 |
4 |
manual(m5) |
f |
21 |
31 |
r |
compact |
| toyota |
camry solara |
2.4 |
2008 |
4 |
auto(s5) |
f |
22 |
31 |
r |
compact |
| toyota |
camry solara |
3.0 |
1999 |
6 |
auto(l4) |
f |
18 |
26 |
r |
compact |
| toyota |
camry solara |
3.0 |
1999 |
6 |
manual(m5) |
f |
18 |
26 |
r |
compact |
| toyota |
camry solara |
3.3 |
2008 |
6 |
auto(s5) |
f |
18 |
27 |
r |
compact |
| toyota |
corolla |
1.8 |
1999 |
4 |
auto(l3) |
f |
24 |
30 |
r |
compact |
| toyota |
corolla |
1.8 |
1999 |
4 |
auto(l4) |
f |
24 |
33 |
r |
compact |
| toyota |
corolla |
1.8 |
1999 |
4 |
manual(m5) |
f |
26 |
35 |
r |
compact |
| toyota |
corolla |
1.8 |
2008 |
4 |
manual(m5) |
f |
28 |
37 |
r |
compact |
| toyota |
corolla |
1.8 |
2008 |
4 |
auto(l4) |
f |
26 |
35 |
r |
compact |
| toyota |
land cruiser wagon 4wd |
4.7 |
1999 |
8 |
auto(l4) |
4 |
11 |
15 |
r |
suv |
| toyota |
land cruiser wagon 4wd |
5.7 |
2008 |
8 |
auto(s6) |
4 |
13 |
18 |
r |
suv |
| toyota |
toyota tacoma 4wd |
2.7 |
1999 |
4 |
manual(m5) |
4 |
15 |
20 |
r |
pickup |
| toyota |
toyota tacoma 4wd |
2.7 |
1999 |
4 |
auto(l4) |
4 |
16 |
20 |
r |
pickup |
| toyota |
toyota tacoma 4wd |
2.7 |
2008 |
4 |
manual(m5) |
4 |
17 |
22 |
r |
pickup |
| toyota |
toyota tacoma 4wd |
3.4 |
1999 |
6 |
manual(m5) |
4 |
15 |
17 |
r |
pickup |
| toyota |
toyota tacoma 4wd |
3.4 |
1999 |
6 |
auto(l4) |
4 |
15 |
19 |
r |
pickup |
| toyota |
toyota tacoma 4wd |
4.0 |
2008 |
6 |
manual(m6) |
4 |
15 |
18 |
r |
pickup |
| toyota |
toyota tacoma 4wd |
4.0 |
2008 |
6 |
auto(l5) |
4 |
16 |
20 |
r |
pickup |
| volkswagen |
gti |
2.0 |
1999 |
4 |
manual(m5) |
f |
21 |
29 |
r |
compact |
| volkswagen |
gti |
2.0 |
1999 |
4 |
auto(l4) |
f |
19 |
26 |
r |
compact |
| volkswagen |
gti |
2.0 |
2008 |
4 |
manual(m6) |
f |
21 |
29 |
p |
compact |
| volkswagen |
gti |
2.0 |
2008 |
4 |
auto(s6) |
f |
22 |
29 |
p |
compact |
| volkswagen |
gti |
2.8 |
1999 |
6 |
manual(m5) |
f |
17 |
24 |
r |
compact |
| volkswagen |
jetta |
1.9 |
1999 |
4 |
manual(m5) |
f |
33 |
44 |
d |
compact |
| volkswagen |
jetta |
2.0 |
1999 |
4 |
manual(m5) |
f |
21 |
29 |
r |
compact |
| volkswagen |
jetta |
2.0 |
1999 |
4 |
auto(l4) |
f |
19 |
26 |
r |
compact |
| volkswagen |
jetta |
2.0 |
2008 |
4 |
auto(s6) |
f |
22 |
29 |
p |
compact |
| volkswagen |
jetta |
2.0 |
2008 |
4 |
manual(m6) |
f |
21 |
29 |
p |
compact |
| volkswagen |
jetta |
2.5 |
2008 |
5 |
auto(s6) |
f |
21 |
29 |
r |
compact |
| volkswagen |
jetta |
2.5 |
2008 |
5 |
manual(m5) |
f |
21 |
29 |
r |
compact |
| volkswagen |
jetta |
2.8 |
1999 |
6 |
auto(l4) |
f |
16 |
23 |
r |
compact |
| volkswagen |
jetta |
2.8 |
1999 |
6 |
manual(m5) |
f |
17 |
24 |
r |
compact |
| volkswagen |
new beetle |
1.9 |
1999 |
4 |
manual(m5) |
f |
35 |
44 |
d |
subcompact |
| volkswagen |
new beetle |
1.9 |
1999 |
4 |
auto(l4) |
f |
29 |
41 |
d |
subcompact |
| volkswagen |
new beetle |
2.0 |
1999 |
4 |
manual(m5) |
f |
21 |
29 |
r |
subcompact |
| volkswagen |
new beetle |
2.0 |
1999 |
4 |
auto(l4) |
f |
19 |
26 |
r |
subcompact |
| volkswagen |
new beetle |
2.5 |
2008 |
5 |
manual(m5) |
f |
20 |
28 |
r |
subcompact |
| volkswagen |
new beetle |
2.5 |
2008 |
5 |
auto(s6) |
f |
20 |
29 |
r |
subcompact |
| volkswagen |
passat |
1.8 |
1999 |
4 |
manual(m5) |
f |
21 |
29 |
p |
midsize |
| volkswagen |
passat |
1.8 |
1999 |
4 |
auto(l5) |
f |
18 |
29 |
p |
midsize |
| volkswagen |
passat |
2.0 |
2008 |
4 |
auto(s6) |
f |
19 |
28 |
p |
midsize |
| volkswagen |
passat |
2.0 |
2008 |
4 |
manual(m6) |
f |
21 |
29 |
p |
midsize |
| volkswagen |
passat |
2.8 |
1999 |
6 |
auto(l5) |
f |
16 |
26 |
p |
midsize |
| volkswagen |
passat |
2.8 |
1999 |
6 |
manual(m5) |
f |
18 |
26 |
p |
midsize |
| volkswagen |
passat |
3.6 |
2008 |
6 |
auto(s6) |
f |
17 |
26 |
p |
midsize |
char_var <- sapply(mpg, is.character)
mpg[ , char_var] <- lapply(mpg[ , char_var], as.factor)
str(mpg)
## Classes 'tbl_df', 'tbl' and 'data.frame': 234 obs. of 11 variables:
## $ manufacturer: Factor w/ 15 levels "audi","chevrolet",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ model : Factor w/ 38 levels "4runner 4wd",..: 2 2 2 2 2 2 2 3 3 3 ...
## $ displ : num 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : Factor w/ 10 levels "auto(av)","auto(l3)",..: 4 9 10 1 4 9 1 9 4 10 ...
## $ drv : Factor w/ 3 levels "4","f","r": 2 2 2 2 2 2 2 1 1 1 ...
## $ cty : int 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : Factor w/ 5 levels "c","d","e","p",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ class : Factor w/ 7 levels "2seater","compact",..: 2 2 2 2 2 2 2 2 2 2 ...
Discretization
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(arules)
## Warning: package 'arules' was built under R version 3.5.3
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
mpg$displ_grp <- discretize(mpg$displ, method = "frequency",
breaks = 3, labels = c("low", "medium", "high"))
mpg %>%
group_by(displ_grp) %>%
summarize(avg_displ = mean(displ), count = n(), min = min(displ), max = max(displ))
## # A tibble: 3 x 5
## displ_grp avg_displ count min max
## <fct> <dbl> <int> <dbl> <dbl>
## 1 low 2.02 62 1.6 2.4
## 2 medium 3.06 86 2.5 3.9
## 3 high 4.93 86 4 7
Data Normalization and Standardization:
mpg$displ_scale <- scale(mpg$displ, center = TRUE, scale = TRUE)
summary(mpg$displ)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.600 2.400 3.300 3.472 4.600 7.000
sd(mpg$displ)
## [1] 1.291959
summary(mpg$displ_scale)
## V1
## Min. :-1.4488
## 1st Qu.:-0.8296
## Median :-0.1330
## Mean : 0.0000
## 3rd Qu.: 0.8733
## Max. : 2.7309
sd(mpg$displ_scale)
## [1] 1
Deal with Data Outliers and Noises (1):
Before removing outliers:
boxplot(mpg$hwy)

mpg$hwy[mpg$hwy %in% boxplot.stats(mpg$hwy)$out] <- median(mpg$hwy, na.rm = T)
boxplot(mpg$hwy)

Deal with Data Outliers and Noises (2):
After removing outliers:
mpg$hwy[mpg$hwy %in% boxplot.stats(mpg$hwy)$out] <- median(mpg$hwy, na.rm = T)
boxplot(mpg$hwy)

Remove Duplicate Data Record:
nrow(mpg)
## [1] 234
nrow(mpg[!duplicated(mpg), ])
## [1] 225
mpg %>%
distinct( .keep_all = T) %>%
nrow()
## [1] 225
mpg[duplicated(mpg) | duplicated(mpg, fromLast = T), ]
## # A tibble: 18 x 13
## manufacturer model displ year cyl trans drv cty hwy fl class
## <fct> <fct> <dbl> <int> <int> <fct> <fct> <int> <dbl> <fct> <fct>
## 1 chevrolet c150~ 5.3 2008 8 auto~ r 14 20 r suv
## 2 chevrolet c150~ 5.3 2008 8 auto~ r 14 20 r suv
## 3 dodge cara~ 3.3 1999 6 auto~ f 16 22 r mini~
## 4 dodge cara~ 3.3 1999 6 auto~ f 16 22 r mini~
## 5 dodge cara~ 3.3 2008 6 auto~ f 17 24 r mini~
## 6 dodge cara~ 3.3 2008 6 auto~ f 17 24 r mini~
## 7 dodge dako~ 4.7 2008 8 auto~ 4 14 19 r pick~
## 8 dodge dako~ 4.7 2008 8 auto~ 4 14 19 r pick~
## 9 dodge dura~ 4.7 2008 8 auto~ 4 13 17 r suv
## 10 dodge dura~ 4.7 2008 8 auto~ 4 13 17 r suv
## 11 dodge ram ~ 4.7 2008 8 manu~ 4 12 16 r pick~
## 12 dodge ram ~ 4.7 2008 8 auto~ 4 13 17 r pick~
## 13 dodge ram ~ 4.7 2008 8 auto~ 4 13 17 r pick~
## 14 dodge ram ~ 4.7 2008 8 manu~ 4 12 16 r pick~
## 15 ford expl~ 4 1999 6 auto~ 4 14 17 r suv
## 16 ford expl~ 4 1999 6 auto~ 4 14 17 r suv
## 17 honda civic 1.6 1999 4 auto~ f 24 32 r subc~
## 18 honda civic 1.6 1999 4 auto~ f 24 32 r subc~
## # ... with 2 more variables: displ_grp <fct>, displ_scale[,1] <dbl>
Aggregation:
Goal: output the first five most fuel economy models:
mpg %>%
group_by(manufacturer) %>%
summarize(avg_cty = mean(cty)) %>%
arrange(desc(avg_cty)) %>%
head(10)
## # A tibble: 10 x 2
## manufacturer avg_cty
## <fct> <dbl>
## 1 honda 24.4
## 2 volkswagen 20.9
## 3 subaru 19.3
## 4 hyundai 18.6
## 5 toyota 18.5
## 6 nissan 18.1
## 7 audi 17.6
## 8 pontiac 17
## 9 chevrolet 15
## 10 ford 14
Random Data Sampling without Replacement:
mpg_sample_index <- sample(1:nrow(mpg),
size = nrow(mpg) * 0.3,
replace = F)
prop.table(table(mpg[mpg_sample_index, "drv"]))
##
## 4 f r
## 0.4000000 0.4857143 0.1142857
mpg[mpg_sample_index, ]
## # A tibble: 70 x 13
## manufacturer model displ year cyl trans drv cty hwy fl class
## <fct> <fct> <dbl> <int> <int> <fct> <fct> <int> <dbl> <fct> <fct>
## 1 subaru fore~ 2.5 2008 4 manu~ 4 19 25 p suv
## 2 volkswagen jetta 2 1999 4 manu~ f 21 29 r comp~
## 3 ford must~ 4 2008 6 auto~ r 16 24 r subc~
## 4 lincoln navi~ 5.4 1999 8 auto~ r 11 17 r suv
## 5 subaru impr~ 2.5 1999 4 auto~ 4 19 26 r subc~
## 6 honda civic 2 2008 4 manu~ f 21 29 p subc~
## 7 chevrolet corv~ 7 2008 8 manu~ r 15 24 p 2sea~
## 8 dodge dako~ 4.7 2008 8 auto~ 4 9 12 e pick~
## 9 volkswagen gti 2 2008 4 manu~ f 21 29 p comp~
## 10 chevrolet corv~ 5.7 1999 8 auto~ r 15 23 p 2sea~
## # ... with 60 more rows, and 2 more variables: displ_grp <fct>,
## # displ_scale[,1] <dbl>
Stratified Data Sampling:
library(caret)
## Warning: package 'caret' was built under R version 3.5.3
## Loading required package: lattice
prop.table(table(mpg$drv))
##
## 4 f r
## 0.4401709 0.4529915 0.1068376
Missing Value Manipulations (1):
Check if missing values exist in the dataset. If not, then randomly assign some missing values and then check how many missing values exist for which values
sum(!complete.cases(mpg))
## [1] 0
mpg$hwy[sample(1:length(mpg$hwy), size = 5, replace = F)] <- NA
sum(!complete.cases(mpg))
## [1] 5
sapply(mpg, function(x) sum(is.na(x)))
## manufacturer model displ year cyl
## 0 0 0 0 0
## trans drv cty hwy fl
## 0 0 0 5 0
## class displ_grp displ_scale
## 0 0 0
Missing Value Manipulations (2)
Simple imputation method for missing value: replace with mean of the attribute:
mpg2 <- mpg
sum(!complete.cases(mpg2))
## [1] 5
mpg2$hwy[is.na(mpg2$hwy)] <- mean(mpg2$hwy, na.rm = T)
sum(!complete.cases(mpg2))
## [1] 0
More advanced missing value imputation algorithms: kNN imputation:
library(caret)
preprocess <- preProcess(mpg, method = c("knnImpute", "center", "scale"))
mpg3 <- predict(preprocess, mpg)
sum(!complete.cases(mpg3))
## [1] 0
Visualization: Box Plot:
theme_set(theme_bw())
ggplot(mpg, aes(x = displ_grp, y = hwy)) +
geom_boxplot() +
theme(panel.grid.major.x = element_blank())
## Warning: Removed 5 rows containing non-finite values (stat_boxplot).

Visualization: Scatter Plot (and its Variants):
ggplot(mpg, aes(x = hwy, y = cty)) +
geom_point(aes(color = as.factor(cyl), size = displ)) +
geom_smooth(method = "lm")
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing missing values (geom_point).

Visualization: Pie Chart:
library(scales)
## Warning: package 'scales' was built under R version 3.5.3
mpg %>% group_by(displ_grp) %>%
summarise(pct = percent(n()/nrow(mpg))) %>%
ggplot(aes(x = factor(1), y = pct, fill = displ_grp)) +
geom_bar(stat = "identity") +
coord_polar(theta = "y") +
theme(axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank())

Visualization: Ordered Bar Plot:
mpg_by_maker <- mpg %>%
group_by(manufacturer) %>%
summarize(avg_cty = mean(cty, na.rm = T)) %>%
arrange(desc(avg_cty))
mpg_by_maker$manufacturer <- factor(mpg_by_maker$manufacturer,
levels = mpg_by_maker$manufacturer)
ggplot(mpg_by_maker, aes(x = manufacturer, y = avg_cty)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank())

Visualization: Density Plot:
ggplot(mpg, aes(cty)) +
geom_density(aes(fill = factor(cyl))) +
labs(title = "Density plot",
subtitle = "City Mileage Grouped by Number of cylinders",
caption = "Source: mpg",
x = "City Mileage",
fill = "# Cylinders")

Visualization: Heatmap:
ggplot(mpg, aes(x = as.factor(year), y = displ_grp)) +
geom_tile(aes(fill = hwy), color = "white") +
scale_fill_gradient(low = "red", high = "green") +
theme(axis.ticks = element_blank()) +
labs(x = "Year", y = "Engine Power")

Visualization: Faceting:
ggplot(mpg, aes(cty, hwy)) +
geom_point() +
facet_grid(year ~ displ_grp)
## Warning: Removed 5 rows containing missing values (geom_point).
