ggplot(data = mpg)
nrow(mpg)
## [1] 234
ncol(mpg)
## [1] 11
?mpg
## starting httpd help server ... done
#map the cyl column and hwy column of dataset mpg to x and y axis.
ggplot(data = mpg) +
geom_point(mapping = aes(x = cyl, y = hwy))
#map the class column and drv column of dataset mpg to x and y axis.
ggplot(data = mpg) +
geom_point(mapping = aes(x = class, y = drv))
span style=“color: #3d518e;”> I see a few points that does not form a pattern. The reason that it’s not useful is that drv and class are categorical variables, which have only a few values.
#map the displ column and hwy column of dataset mpg to x and y axis, and make the color of all the points blue.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), color = "blue")
#view mpg table.
mpg
## # A tibble: 234 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto… f 18 29 p comp…
## 2 audi a4 1.8 1999 4 manu… f 21 29 p comp…
## 3 audi a4 2 2008 4 manu… f 20 31 p comp…
## 4 audi a4 2 2008 4 auto… f 21 30 p comp…
## 5 audi a4 2.8 1999 6 auto… f 16 26 p comp…
## 6 audi a4 2.8 1999 6 manu… f 18 26 p comp…
## 7 audi a4 3.1 2008 6 auto… f 18 27 p comp…
## 8 audi a4 quattro 1.8 1999 4 manu… 4 18 26 p comp…
## 9 audi a4 quattro 1.8 1999 4 auto… 4 16 25 p comp…
## 10 audi a4 quattro 2 2008 4 manu… 4 20 28 p comp…
## # … with 224 more rows
#mapping cty to color
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = cty))
#mapping cty to size
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, size = cty))
#mapping cty to shape
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, shape = cty))
## Error in `scale_f()`:
## ! A continuous variable can not be mapped to shape
#mapping displ to both x axis and size, mapping hwy to both y axis and color
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = hwy, size = displ))
#making the stroke width 3, color purple, the fill of the shape white, size 5
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), shape=21, color='purple', fill='white', size=5, stroke=3)
ggplot(mpg, aes(x = displ, y = hwy, colour = displ < 5)) +
geom_point()
#facet by values of drv on the y-axis.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ .)
#facet by values of cyl on the x-axis.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(. ~ cyl)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~ class, nrow = 2)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = class))
?facet_wrap
#map displ to x-axis, hmy to y-axis, and drv as color coding. geom_point will create a scatter plot for this mapping, and geom_smooth creates three smooth lines, each for one drv, without standard errors.
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) +
geom_point() +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot() +
geom_point(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_smooth(data = mpg, mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#map displ to x-axis, hwy to y-axis, draw a scatter plot and a smooth line
ggplot(mpg, aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#map displ to x-axis, hwy to y-axis, draw a scatter plot and three smooth lines where each represent one drv.
ggplot(mpg, aes(x = displ, y = hwy)) +
geom_smooth(mapping = aes(group = drv), se = FALSE) +
geom_point()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#map displ to x-axis, hwy to y-axis, drv to colour, draw a scatter plot and three smooth lines where each represent one drv.
ggplot(mpg, aes(x = displ, y = hwy, colour = drv)) +
geom_point() +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#map displ to x-axis, hwy to y-axis, draw a scatter plot with colour representing drv, and one smooth line that doesn't include drv values.
ggplot(mpg, aes(x = displ, y = hwy)) +
geom_point(aes(colour = drv)) +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#map displ to x-axis, hwy to y-axis, draw a scatter plot with colour representing drv, draw three smooth lines where each drv has a different linetype.
ggplot(mpg, aes(x = displ, y = hwy)) +
geom_point(aes(colour = drv)) +
geom_smooth(aes(linetype = drv), se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#map displ to x-axis, hwy to y-axis, draw two scatter plots, one include drv values as colours, the other one exclude drv values, and make the size of each dot 4, make the color white.
ggplot(mpg, aes(x = displ, y = hwy)) +
geom_point(size = 4, color = "white") +
geom_point(aes(colour = drv))
#the default geom is geom_pointrange, stat+'summary' is added because the default stat is identity.
ggplot(data = diamonds) +
geom_pointrange(
mapping = aes(x = cut, y = depth),
stat = "summary"
)
## No summary function supplied, defaulting to `mean_se()`
#stat_summary uses the mean and sd to calculate the middle point and endpoints of the line. To recreate the previous plot where min and max were the endpoints, the values for fun.min, fun.max and fun needs to be defined.
ggplot(data = diamonds) +
geom_pointrange(
mapping = aes(x = cut, y = depth),
stat = "summary",
fun.min = min,
fun.max = max,
fun = median
)
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, y = after_stat(prop)))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = color, y = after_stat(prop)))
ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
geom_point(position = "jitter")
#The geom geom_jitter() adds random variation to the locations points of the graph.This method reduces overplotting since two points with the same location are unlikely to have the same random variation.
ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
geom_jitter()
#The geom geom_count() sizes the points relative to the number of observations. Combinations of (x, y) values with more observations will be larger than those with fewer observations.
ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
geom_count()
#When adding a third variable as color aesthetic, geom_count will be less readable than geom_jitter
ggplot(data = mpg, mapping = aes(x = cty, y = hwy, color = class)) +
geom_jitter()
ggplot(data = mpg, mapping = aes(x = cty, y = hwy, color = class)) +
geom_count()
#mapping the y to the angle of each pie section.
ggplot(mpg, aes(x = factor(1), fill = drv)) +
geom_bar(width = 1) +
coord_polar(theta = "y")
#mapping the y to the angle of each pie section.
ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
geom_point() +
geom_abline() +
coord_fixed()