There are multiple plotting systems in R:
powerful and flexible
Compare the plots from base graphics:
x <- rnorm(1:100)
hist(x)
ggplot(data.frame(x), aes(x)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data.frame(x), aes(x)) +
geom_histogram(fill="#c0392b", bins=30) +
labs(title="Distribution of Some Important Data",
subtitle="An interesting insight into my data",
caption="My plot 1",
x="# of Entries in Listicle",
y="# of Listicles") +
theme_minimal() +
geom_hline(yintercept=0, size=0.4, color="black") +
geom_vline(aes(xintercept=mean(x)), linetype="dashed")
data.framesLoad the library:
library(ggplot2)
If you do not have it, install using:
install.packages("ggplot2")
?mpg
str(mpg)
## Classes 'tbl_df', 'tbl' and 'data.frame': 234 obs. of 11 variables:
## $ manufacturer: chr "audi" "audi" "audi" "audi" ...
## $ model : chr "a4" "a4" "a4" "a4" ...
## $ displ : num 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr "f" "f" "f" "f" ...
## $ cty : int 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr "p" "p" "p" "p" ...
## $ class : chr "compact" "compact" "compact" "compact" ...
summary(mpg)
## manufacturer model displ year
## Length:234 Length:234 Min. :1.600 Min. :1999
## Class :character Class :character 1st Qu.:2.400 1st Qu.:1999
## Mode :character Mode :character Median :3.300 Median :2004
## Mean :3.472 Mean :2004
## 3rd Qu.:4.600 3rd Qu.:2008
## Max. :7.000 Max. :2008
## cyl trans drv cty
## Min. :4.000 Length:234 Length:234 Min. : 9.00
## 1st Qu.:4.000 Class :character Class :character 1st Qu.:14.00
## Median :6.000 Mode :character Mode :character Median :17.00
## Mean :5.889 Mean :16.86
## 3rd Qu.:8.000 3rd Qu.:19.00
## Max. :8.000 Max. :35.00
## hwy fl class
## Min. :12.00 Length:234 Length:234
## 1st Qu.:18.00 Class :character Class :character
## Median :24.00 Mode :character Mode :character
## Mean :23.44
## 3rd Qu.:27.00
## Max. :44.00
Columns:
manufacturer - car manufacturermodel - car modeldispl - engine displacement, in litresyear - year of manufacturecyl - number of cylinderstrans - type of transmissiondrv - f = front-wheel drive, r = rear wheel drive, 4 = 4wdcty - city miles per gallonhwy - highway miles per gallonfl - fuel typeclass - “type” of carSample:
knitr::kable(head(mpg, n=20), format="markdown")
| manufacturer | model | displ | year | cyl | trans | drv | cty | hwy | fl | class |
|---|---|---|---|---|---|---|---|---|---|---|
| audi | a4 | 1.8 | 1999 | 4 | auto(l5) | f | 18 | 29 | p | compact |
| audi | a4 | 1.8 | 1999 | 4 | manual(m5) | f | 21 | 29 | p | compact |
| audi | a4 | 2.0 | 2008 | 4 | manual(m6) | f | 20 | 31 | p | compact |
| audi | a4 | 2.0 | 2008 | 4 | auto(av) | f | 21 | 30 | p | compact |
| audi | a4 | 2.8 | 1999 | 6 | auto(l5) | f | 16 | 26 | p | compact |
| audi | a4 | 2.8 | 1999 | 6 | manual(m5) | f | 18 | 26 | p | compact |
| audi | a4 | 3.1 | 2008 | 6 | auto(av) | f | 18 | 27 | p | compact |
| audi | a4 quattro | 1.8 | 1999 | 4 | manual(m5) | 4 | 18 | 26 | p | compact |
| audi | a4 quattro | 1.8 | 1999 | 4 | auto(l5) | 4 | 16 | 25 | p | compact |
| audi | a4 quattro | 2.0 | 2008 | 4 | manual(m6) | 4 | 20 | 28 | p | compact |
| audi | a4 quattro | 2.0 | 2008 | 4 | auto(s6) | 4 | 19 | 27 | p | compact |
| audi | a4 quattro | 2.8 | 1999 | 6 | auto(l5) | 4 | 15 | 25 | p | compact |
| audi | a4 quattro | 2.8 | 1999 | 6 | manual(m5) | 4 | 17 | 25 | p | compact |
| audi | a4 quattro | 3.1 | 2008 | 6 | auto(s6) | 4 | 17 | 25 | p | compact |
| audi | a4 quattro | 3.1 | 2008 | 6 | manual(m6) | 4 | 15 | 25 | p | compact |
| audi | a6 quattro | 2.8 | 1999 | 6 | auto(l5) | 4 | 15 | 24 | p | midsize |
| audi | a6 quattro | 3.1 | 2008 | 6 | auto(s6) | 4 | 17 | 25 | p | midsize |
| audi | a6 quattro | 4.2 | 2008 | 8 | auto(s6) | 4 | 16 | 23 | p | midsize |
| chevrolet | c1500 suburban 2wd | 5.3 | 2008 | 8 | auto(l4) | r | 14 | 20 | r | suv |
| chevrolet | c1500 suburban 2wd | 5.3 | 2008 | 8 | auto(l4) | r | 11 | 15 | e | suv |
p <- ggplot(data=mpg, aes(displ, hwy))
p
p <- ggplot(data=mpg, aes(x=displ, y=hwy))
p + geom_point()
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point()
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(aes(color=class))
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(aes(color=cyl))
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(aes(color=factor(cyl)))
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(aes(size=cyl))
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(aes(color=class, size=cyl))
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(aes(shape=factor(cyl)), size=4)
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(colour="black", size=4.5) +
geom_point(colour="white", size=4) +
geom_point(aes(shape = factor(cyl)))
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(colour="black", size=4.5, show.legend=T) +
geom_point(colour="white", size=4, show.legend=T) +
geom_point(aes(shape = factor(cyl)))
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point() + geom_line()
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(aes(color=factor(cyl))) + geom_line()
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(aes(color=factor(cyl))) + geom_line(aes(color=factor(cyl)))
p <- ggplot(mpg, aes(x=displ, y=hwy, color=factor(cyl)))
p + geom_point() + geom_line()
p + geom_point(color="black") + geom_line()
p <- ggplot(mpg, aes(cyl, hwy))
p + geom_point()
p <- ggplot(mpg, aes(cyl, hwy))
p + geom_jitter()
p <- ggplot(mpg, aes(cyl, hwy))
p + geom_jitter(aes(colour = class))
ggplot(mpg, aes(class, hwy)) + geom_boxplot()
ggplot(mpg) +
geom_boxplot(aes(class, hwy)) +
geom_boxplot(aes(class, cty))
ggplot(mpg) +
geom_boxplot(aes(class, hwy), color="blue") +
geom_boxplot(aes(class, cty), color="red")
ggplot(mpg, aes(class, hwy, fill = factor(year))) +
geom_boxplot()
ggplot(mpg, aes(reorder(class, hwy, median), hwy, fill = factor(year))) +
geom_boxplot()
reorder(mpg$class, mpg$hwy, median)
## [1] compact compact compact compact compact compact
## [7] compact compact compact compact compact compact
## [13] compact compact compact midsize midsize midsize
## [19] suv suv suv suv suv 2seater
## [25] 2seater 2seater 2seater 2seater suv suv
## [31] suv suv midsize midsize midsize midsize
## [37] midsize minivan minivan minivan minivan minivan
## [43] minivan minivan minivan minivan minivan minivan
## [49] pickup pickup pickup pickup pickup pickup
## [55] pickup pickup pickup suv suv suv
## [61] suv suv suv suv pickup pickup
## [67] pickup pickup pickup pickup pickup pickup
## [73] pickup pickup suv suv suv suv
## [79] suv suv suv suv suv pickup
## [85] pickup pickup pickup pickup pickup pickup
## [91] subcompact subcompact subcompact subcompact subcompact subcompact
## [97] subcompact subcompact subcompact subcompact subcompact subcompact
## [103] subcompact subcompact subcompact subcompact subcompact subcompact
## [109] midsize midsize midsize midsize midsize midsize
## [115] midsize subcompact subcompact subcompact subcompact subcompact
## [121] subcompact subcompact suv suv suv suv
## [127] suv suv suv suv suv suv
## [133] suv suv suv suv suv suv
## [139] suv suv suv compact compact midsize
## [145] midsize midsize midsize midsize midsize midsize
## [151] suv suv suv suv midsize midsize
## [157] midsize midsize midsize suv suv suv
## [163] suv suv suv subcompact subcompact subcompact
## [169] subcompact compact compact compact compact suv
## [175] suv suv suv suv suv midsize
## [181] midsize midsize midsize midsize midsize midsize
## [187] compact compact compact compact compact compact
## [193] compact compact compact compact compact compact
## [199] suv suv pickup pickup pickup pickup
## [205] pickup pickup pickup compact compact compact
## [211] compact compact compact compact compact compact
## [217] compact compact compact compact compact subcompact
## [223] subcompact subcompact subcompact subcompact subcompact midsize
## [229] midsize midsize midsize midsize midsize midsize
## attr(,"scores")
## 2seater compact midsize minivan pickup subcompact
## 25.0 27.0 27.0 23.0 17.0 26.0
## suv
## 17.5
## Levels: pickup suv minivan 2seater subcompact compact midsize
with(mpg, reorder(class, hwy, median))
ggplot(mpg, aes(class)) + geom_bar()
ggplot(mpg, aes(class, hwy)) + geom_bar(stat="summary", fun.y=mean)
hwy <- mpg[, c("class", "hwy")]
hwy <- cbind(hwy, "hwy")
names(hwy) <- c("class", "val", "type")
cty <- mpg[, c("class", "cty")]
cty <- cbind(cty, "cty")
names(cty) <- c("class", "val", "type")
ds <- rbind(hwy, cty)
ds[sample(1:nrow(ds), 10),]
## class val type
## 146 midsize 27 hwy
## 410 suv 15 cty
## 344 midsize 18 cty
## 58 suv 17 hwy
## 381 midsize 19 cty
## 316 suv 13 cty
## 290 pickup 11 cty
## 355 subcompact 16 cty
## 165 suv 23 hwy
## 382 midsize 18 cty
ggplot(ds, aes(class, val, fill=type)) + geom_bar(stat="summary", fun.y=mean)
ggplot(ds, aes(class, val, fill=type)) + geom_bar(stat="summary", fun.y=mean, position="dodge")
position_identity - default of most geomsposition_jitter - default of geom_jitterposition_dodge - default of geom_boxplotposition_stack - default of geom_bar==geom_histogram and geom_areaposition_fill - useful for geom_bar==geom_histogram and geom_areads2 <- lapply(unique(ds$class), function(x) {
v <- ds[ds$class==x, ]
cty <- mean_cl_boot(v[v$type == "cty", "val"])
cty["type"] <- "cty"
hwy <- mean_cl_boot(v[v$type == "hwy", "val"])
hwy["type"] <- "hwy"
df <- rbind(cty, hwy)
df["class"]<-x
df
})
ds2 <- do.call(rbind, ds2)
ds2
## y ymin ymax type class
## 1 20.12766 19.25479 21.10691 cty compact
## 2 28.29787 27.27606 29.51117 hwy compact
## 3 18.75610 18.17073 19.31707 cty midsize
## 4 27.29268 26.68293 27.90244 hwy midsize
## 5 13.50000 12.90323 14.09718 cty suv
## 6 18.12903 17.40323 18.88750 hwy suv
## 7 15.40000 15.00000 15.80000 cty 2seater
## 8 24.80000 23.80000 25.80000 hwy 2seater
## 9 15.81818 14.72500 16.63636 cty minivan
## 10 22.36364 21.00000 23.36591 hwy minivan
## 11 13.00000 12.36364 13.69697 cty pickup
## 12 16.87879 16.06061 17.60606 hwy pickup
## 13 20.37143 18.85643 21.91500 cty subcompact
## 14 28.14286 26.42786 30.05714 hwy subcompact
ggplot(ds2, aes(class, y, fill=type)) +
geom_bar(stat="identity", position="dodge") +
geom_errorbar(aes(ymin=ymin, ymax=ymax), position=position_dodge(.9), width=.2)
ggplot(ds, aes(class, val, fill=type)) +
geom_bar(stat="summary", fun.y=mean, position="dodge") +
stat_summary(fun.data=mean_cl_boot, color="black", geom="errorbar", position=position_dodge(.9), width=.2)
ggplot(mtcars, aes(mpg, wt)) +
geom_point() +
geom_text(aes(label=rownames(mtcars)))
ggplot(mtcars, aes(mpg, wt)) +
geom_point() +
geom_text(aes(label=rownames(mtcars)), size=2)
ggplot(mtcars, aes(mpg, wt)) +
geom_point() +
geom_text(aes(label=rownames(mtcars), size=wt))
ggplot(mtcars, aes(mpg, wt)) +
geom_point() +
annotate("text", label = "plot mpg vs. wt", x = 2, y = 5, size = 8, colour = "red")
ggplot(mpg, aes(class, cty)) + geom_bar(stat="identity") +
geom_text(aes(label=cty), color="white")
mpg[mpg$class=="2seater", "cty"]
## # A tibble: 5 × 1
## cty
## <int>
## 1 16
## 2 15
## 3 16
## 4 15
## 5 15
ds2 <- lapply(unique(mpg$class), function(x) {
v <- ds[mpg$class==x, ]
cty <- mean(v[v$type == "cty", "val"])
data.frame(class=x, cty)
})
ds2 <- do.call(rbind, ds2)
ds2
## class cty
## 1 compact 20.12766
## 2 midsize 18.75610
## 3 suv 13.50000
## 4 2seater 15.40000
## 5 minivan 15.81818
## 6 pickup 13.00000
## 7 subcompact 20.37143
ggplot(ds2, aes(class, cty)) + geom_bar(stat="identity") +
geom_text(aes(label=round(cty, digits=2)), color="white", vjust=2)
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(aes(color=class, size=cyl)) +
labs(title="Fuel economy in relation to engine dislacement",
subtitle="Less is more",
caption="A caption",
x="engine displacement",
y="highway miles per gallon")
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(aes(color=class, size=cyl)) +
labs(title="Fuel economy in relation to engine dislacement",
subtitle="Less is more",
caption="A caption",
x="engine displacement",
y="highway miles per gallon") +
guides(color=guide_legend(title="Car class"), size=guide_legend(title="Number of cylinders"))
p <- ggplot(mpg, aes(x=model, y=hwy))
p + geom_bar(stat="identity")
p <- ggplot(mpg, aes(x=model, y=hwy))
p + geom_bar(stat="identity") +
theme(axis.text.x = element_text(angle = 90))
p <- ggplot(mpg, aes(x=model, y=hwy))
p + geom_bar(stat="identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
library(ggthemes)
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(aes(color=class, size=cyl))
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(aes(color=class, size=cyl)) +
theme_economist() +
scale_colour_economist()
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(aes(color=class, size=cyl)) +
theme_minimal()
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point(aes(color=class, size=cyl)) +
theme_excel() + scale_colour_excel()
p <- ggplot(mpg, aes(displ, hwy))
p + geom_point() + geom_smooth(method="lm")
p <- ggplot(mpg, aes(displ, hwy, color=factor(cyl)))
p + geom_point() + geom_smooth(method="lm")
p <- ggplot(mpg, aes(displ, hwy))
p + geom_point() + geom_smooth(method="loess")
qplot - quick plot
plot()A formula with the rows (of the tabular display) on the LHS and the columns (of the tabular display) on the RHS; the dot in the formula is used to indicate there should be no faceting on this dimension (either row or column).
y ~ x
x is the explanatory variabley is the response variabley ~ x + z
p <- ggplot(mpg, aes(x=displ, y=hwy)) + geom_point()
p + facet_grid(. ~ cyl)
p <- ggplot(mpg, aes(x=displ, y=hwy)) + geom_point()
p + facet_grid(cyl ~ .)
p <- ggplot(mpg, aes(x=displ, y=hwy)) + geom_point()
p + facet_grid(drv ~ cyl)
p <- ggplot(mtcars, aes(mpg, wt, colour = factor(cyl))) + geom_point()
p + facet_grid(. ~ cyl, scales = "free")
ggplot(mpg, aes(drv, model)) +
geom_point() +
facet_grid(manufacturer ~ ., scales = "free", space = "free") +
theme(strip.text.y = element_text(angle = 0))
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:Hmisc':
##
## subplot
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p <- ggplot(mpg, aes(x=displ, y=hwy))
p <- p + geom_point(aes(color=class, size=cyl))
(gg <- ggplotly(p))