library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Fuel economy data from 1999 to 2008 for 38 popular models of cars
Description This dataset contains a subset of the fuel economy data that the EPA makes available on https://fueleconomy.gov/. It contains only models which had a new release every year between 1999 and 2008 - this was used as a proxy for the popularity of the car.
Usage: mpg Format A data frame with 234 rows and 11 variables:
manufacturer: manufacturer name
model: model name
displ: engine displacement, in litres
year: year of manufacture
cyl: number of cylinders
trans: type of transmission
drv : the type of drive train, where f = front-wheel drive, r = rear wheel drive, 4 = 4wd
cty: city miles per gallon
hwy: highway miles per gallon
fl: fuel type
class: “type” of car
head(mpg)
## # A tibble: 6 x 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compa~
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compa~
## 3 audi a4 2 2008 4 manual(m6) f 20 31 p compa~
## 4 audi a4 2 2008 4 auto(av) f 21 30 p compa~
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compa~
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compa~
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))
#ggplot(data = <DATA_FRAME>) +
# <GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))
ggplot(data = mpg) +
geom_point(mapping = aes(x = class, y = drv))
Con el parametro color
y dando el valor class
le estamos diciendo que pinte los puntos por clase de automóvil.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = class))
Ahora utilizamos el parametro size
y nuevamente le damos el valor class
, para este parametro conviene que la variable sea numerica.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, size = class))
## Warning: Using size for a discrete variable is not advised.
Para poder alterar la transparencia ocupamos el parámetroalpha
y nuevamente ponemos el valor class
.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, alpha = class))
## Warning: Using alpha for a discrete variable is not advised.
Utilizamos el parámetro shape
para poder alterar la forma de los puntos u observaciones (solo permite 6 formas a la vez), los autos suv´s no aparecen.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, shape = class))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).
Podemos alterar manualmente el color
de los puntos usando valores en inglés de algunos colores, por ejemplo, red.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), color = "red")
color
= nombre del color en formato string.
size
= tamaño del punto en mm.
shape
= forma del punto con números desde el 0 al 25 0 - 14: son formas huecas y por tanto solo se le puede cambiar el color 15- 20: son formas rellenas de color, por tanto se le puede cambiar el color 21 - 25: son formas con borde y relleno, y se les puede cambiar el color (borde) y el fill.
#sacado de la red
d=data.frame(p=c(0:25))
ggplot() +
scale_y_continuous(name="") +
scale_x_continuous(name="") +
scale_shape_identity() +
geom_point(data=d, mapping=aes(x=p%%16, y=p%/%16, shape=p), size=5, fill="yellow") +
geom_text(data=d, mapping=aes(x=p%%16, y=p%/%16+0.25, label=p), size=3)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy),
shape = 23, size = 10, color = "red",
fill = 'yellow')
ggplot(data = mpg) +
geom_point(mapping = aes(x=displ, y = hwy, color = displ<5))
#facet_wrap(~<FORMULA_VARIABLE>): la variable debe ser discreta
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~class, nrow = 3)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~displ, nrow = 3)
ggplot(data = mpg) +
geom_point(mapping = aes(x=displ, y = hwy)) +
facet_grid(drv~cyl)
ggplot(data = mpg) +
geom_point(mapping = aes(x=displ, y = hwy)) +
facet_grid(.~cyl)
ggplot(data = mpg) +
geom_point(mapping = aes(x=displ, y = hwy)) +
facet_grid(drv~.)
ggplot(data = mpg) +
geom_point(mapping = aes(x=displ, y =hwy))
ggplot(data = mpg) +
geom_smooth(mapping = aes(x=displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg) +
geom_smooth(mapping = aes(x=displ, y = hwy, linetype = drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg) +
geom_point(mapping = aes(x=displ, y=hwy, color = drv)) +
geom_smooth(mapping = aes(x=displ, y = hwy, linetype = drv, color = drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg) +
geom_smooth(mapping = aes(x=displ, y=hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg) +
geom_smooth(mapping = aes(x=displ, y=hwy, group = drv, color = drv),
show.legend = T)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x=displ, y=hwy)) +
geom_point(mapping = aes(shape = class)) +
geom_smooth(mapping = aes(color = drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).
ggplot(data = mpg, mapping = aes(x=displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth(data = filter(mpg, class == "suv"), se = F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x=displ, y = hwy,color = drv)) +
geom_point() +
geom_smooth( se = F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x=displ, y = hwy)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg) +
geom_point(mapping = aes(x=displ, y = hwy)) +
geom_smooth(mapping = aes(x=displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x=displ, y = hwy)) +
geom_point() +
geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x=displ, y = hwy)) +
geom_point() +
geom_smooth(mapping = aes(group=drv), se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x=displ, y = hwy, col=drv)) +
geom_point() +
geom_smooth( se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x=displ, y = hwy)) +
geom_point(mapping = aes(col=drv, shape = drv)) +
geom_smooth( se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Ejemplo 6
ggplot(data = mpg, mapping = aes(x=displ, y = hwy) ) +
geom_point(mapping = aes(col=drv, shape = drv)) +
geom_smooth(mapping = aes(linetype = drv), se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x=displ, y = hwy) ) +
geom_point(mapping = aes(fill = drv), size = 4,
shape = 23, col = "white", stroke = 2)
Ahora utilizaremos un dataset diferente.
Prices of over 50,000 round cut diamonds
Description
A dataset containing the prices and other attributes of almost 54,000 diamonds. The variables are as follows:
Usage
diamonds
Format
A data frame with 53940 rows and 10 variables:
price: price in US dollars ($326–$18,823)
carat: weight of the diamond (0.2–5.01)
cut: quality of the cut (Fair, Good, Very Good, Premium, Ideal)
color: diamond colour, from D (best) to J (worst)
clarity: a measurement of how clear the diamond is (I1 (worst), SI2, SI1, VS2, VS1, VVS2, VVS1, IF (best))
x: length in mm (0–10.74)
y: width in mm (0–58.9)
z: depth in mm (0–31.8)
depth: total depth percentage = z / mean(x, y) = 2 * z / (x + y) (43–79)
table: width of top of diamond relative to widest point (43–95)
head(diamonds)
## # A tibble: 6 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut))
ggplot(data = diamonds)+
stat_count(mapping = aes(x=cut))
Generammos un objto tribble
para comprender un poco mejor
demo_diamonds <- tribble(
~cut, ~freqs,
"Fair", 1610,
"Good", 4906,
"Very Good", 12082,
"Premium", 13791,
"Ideal", 21551
)
ggplot(data = demo_diamonds) +
geom_bar(mapping = aes(x=cut, y = freqs),
stat = "identity")
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))
ggplot(data = diamonds) +
stat_summary(
mapping = aes(x = cut, y = depth),
fun.ymin = min,
fun.ymax = max,
fun.y = median
)
## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: `fun.ymin` is deprecated. Use `fun.min` instead.
## Warning: `fun.ymax` is deprecated. Use `fun.max` instead.
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, colour = cut))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = cut))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = color))
ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) +
geom_bar(alpha = 0.2, position = "identity")
ggplot(data = diamonds, mapping = aes(x = cut, colour = clarity)) +
geom_bar(fill = NA, position = "identity")
ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) +
geom_bar(position = "fill")
ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) +
geom_bar(position = "dodge")
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point( position = "jitter" )
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_jitter()
#coord_flip() -> cambia los papeles de x e y
ggplot(data = mpg, mapping = aes(x=class, y = hwy)) +
geom_boxplot()
ggplot(data = mpg, mapping = aes(x=class, y = hwy)) +
geom_boxplot() +
coord_flip()
usa <- map_data("usa")
ggplot(usa, aes(long, lat, group = group)) +
geom_polygon(fill = "blue", color = "white") +
coord_quickmap()
italy <- map_data("italy")
ggplot(italy, aes(long, lat, group = group)) +
geom_polygon(fill = "blue", color = "white") +
coord_quickmap()
ggplot(data = diamonds) +
geom_bar(
mapping = aes(x = cut, fill = cut),
show.legend = F,
width = 1
) +
theme(aspect.ratio = 1) +
labs(x = NULL, y = NULL) +
coord_polar()
ggplot(data = mpg, mapping = aes(x = cty, y = hwy )) +
geom_point() +
geom_abline() +
coord_fixed()
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = color))+
theme(aspect.ratio = 1) +
labs(x = NULL, y = NULL) +
coord_polar()
#ggplot(data = <DATA_FRAME>) +
# <GEOM_FUNCTION>(
# mapping = aes(<MAPPINGS>),
# stat = <STAT>,
# position = <POSITION>
# ) +
# <COORDINATE_FUNCTION>() +
# <FACET_FUNCTION>()
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = clarity, fill = clarity, y = ..count..)) +
coord_polar() +
facet_wrap(~cut) +
labs(x=NULL, y = NULL, title = "Ejemplo final de ggplot",
caption = "Dos variables cruzadas de diamonds",
subtitle="Aprender ggplot puede ser divertido")