Instalando librerías

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Usando uno de los datasets que vienen por defecto haremos varias representaciones

Fuel economy data from 1999 to 2008 for 38 popular models of cars

Description This dataset contains a subset of the fuel economy data that the EPA makes available on https://fueleconomy.gov/. It contains only models which had a new release every year between 1999 and 2008 - this was used as a proxy for the popularity of the car.

Usage: mpg Format A data frame with 234 rows and 11 variables:

manufacturer: manufacturer name

model: model name

displ: engine displacement, in litres

year: year of manufacture

cyl: number of cylinders

trans: type of transmission

drv : the type of drive train, where f = front-wheel drive, r = rear wheel drive, 4 = 4wd

cty: city miles per gallon

hwy: highway miles per gallon

fl: fuel type

class: “type” of car

head(mpg)
## # A tibble: 6 x 11
##   manufacturer model displ  year   cyl trans      drv     cty   hwy fl    class 
##   <chr>        <chr> <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr> 
## 1 audi         a4      1.8  1999     4 auto(l5)   f        18    29 p     compa~
## 2 audi         a4      1.8  1999     4 manual(m5) f        21    29 p     compa~
## 3 audi         a4      2    2008     4 manual(m6) f        20    31 p     compa~
## 4 audi         a4      2    2008     4 auto(av)   f        21    30 p     compa~
## 5 audi         a4      2.8  1999     6 auto(l5)   f        16    26 p     compa~
## 6 audi         a4      2.8  1999     6 manual(m5) f        18    26 p     compa~
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy))

Basicamente esta es la formula para hacer representaciones con ggplot2

#ggplot(data = <DATA_FRAME>) +
#  <GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))

ggplot(data = mpg) +
  geom_point(mapping = aes(x = class, y = drv))

Color de los puntos

Con el parametro color y dando el valor class le estamos diciendo que pinte los puntos por clase de automóvil.

ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy, color = class))

Tamaño de los puntos

Ahora utilizamos el parametro size y nuevamente le damos el valor class, para este parametro conviene que la variable sea numerica.

ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy, size = class))
## Warning: Using size for a discrete variable is not advised.

Transparencia de los puntos

Para poder alterar la transparencia ocupamos el parámetroalpha y nuevamente ponemos el valor class.

ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy, alpha = class))
## Warning: Using alpha for a discrete variable is not advised.

Forma de los puntos

Utilizamos el parámetro shape para poder alterar la forma de los puntos u observaciones (solo permite 6 formas a la vez), los autos suv´s no aparecen.

ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy, shape = class))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).

Eleccion manual de estéticas

Podemos alterar manualmente el color de los puntos usando valores en inglés de algunos colores, por ejemplo, red.

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy), color = "red")

Resumiendo los últimos tres parámetros

color = nombre del color en formato string.

size = tamaño del punto en mm.

shape = forma del punto con números desde el 0 al 25 0 - 14: son formas huecas y por tanto solo se le puede cambiar el color 15- 20: son formas rellenas de color, por tanto se le puede cambiar el color 21 - 25: son formas con borde y relleno, y se les puede cambiar el color (borde) y el fill.

#sacado de la red
d=data.frame(p=c(0:25))
ggplot() +
  scale_y_continuous(name="") +
  scale_x_continuous(name="") +
  scale_shape_identity() +
  geom_point(data=d, mapping=aes(x=p%%16, y=p%/%16, shape=p), size=5, fill="yellow") +
  geom_text(data=d, mapping=aes(x=p%%16, y=p%/%16+0.25, label=p), size=3)

Algunos ejemplos usando lo que ya conocemos hasta ahora y un poco más

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy), 
             shape = 23, size = 10, color = "red", 
             fill = 'yellow')

ggplot(data = mpg) + 
  geom_point(mapping = aes(x=displ, y = hwy, color = displ<5))

FACETS

#facet_wrap(~<FORMULA_VARIABLE>): la variable debe ser discreta
ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_wrap(~class, nrow = 3)

ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_wrap(~displ, nrow = 3)

facet_grid(~)

ggplot(data = mpg) +
  geom_point(mapping = aes(x=displ, y = hwy)) +
  facet_grid(drv~cyl)

ggplot(data = mpg) +
  geom_point(mapping = aes(x=displ, y = hwy)) +
  facet_grid(.~cyl)

ggplot(data = mpg) +
  geom_point(mapping = aes(x=displ, y = hwy)) +
  facet_grid(drv~.)

Diferentes geometrías

ggplot(data = mpg) + 
  geom_point(mapping = aes(x=displ, y =hwy))

ggplot(data = mpg) + 
  geom_smooth(mapping = aes(x=displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg) + 
  geom_smooth(mapping = aes(x=displ, y = hwy, linetype = drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg) + 
  geom_point(mapping = aes(x=displ, y=hwy, color = drv)) +
  geom_smooth(mapping = aes(x=displ, y = hwy, linetype = drv, color = drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg) + 
  geom_smooth(mapping = aes(x=displ, y=hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg) + 
  geom_smooth(mapping = aes(x=displ, y=hwy, group = drv, color = drv),
              show.legend = T)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg, mapping = aes(x=displ, y=hwy)) +
  geom_point(mapping = aes(shape = class)) + 
  geom_smooth(mapping = aes(color = drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).

ggplot(data = mpg, mapping = aes(x=displ, y = hwy)) + 
  geom_point(mapping = aes(color = class)) + 
  geom_smooth(data = filter(mpg, class == "suv"), se = F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg, mapping = aes(x=displ, y = hwy,color = drv)) + 
  geom_point() + 
  geom_smooth( se = F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Ejemplo 1

ggplot(data = mpg, mapping = aes(x=displ, y = hwy)) + 
  geom_point() + 
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg) + 
  geom_point(mapping = aes(x=displ, y = hwy)) + 
  geom_smooth(mapping = aes(x=displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Ejemplo 2

ggplot(data = mpg, mapping = aes(x=displ, y = hwy)) + 
  geom_point() + 
  geom_smooth(se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Ejemplo 3

ggplot(data = mpg, mapping = aes(x=displ, y = hwy)) + 
  geom_point() + 
  geom_smooth(mapping = aes(group=drv), se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Ejemplo 4

ggplot(data = mpg, mapping = aes(x=displ, y = hwy, col=drv)) + 
  geom_point() + 
  geom_smooth( se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Ejemplo 5

ggplot(data = mpg, mapping = aes(x=displ, y = hwy)) + 
  geom_point(mapping = aes(col=drv, shape = drv)) + 
  geom_smooth( se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#Ejemplo 6

ggplot(data = mpg, mapping = aes(x=displ, y = hwy) ) + 
  geom_point(mapping = aes(col=drv, shape = drv)) + 
  geom_smooth(mapping = aes(linetype = drv), se=F)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Ejemplo 7

ggplot(data = mpg, mapping = aes(x=displ, y = hwy) ) + 
  geom_point(mapping = aes(fill = drv), size = 4, 
             shape = 23, col = "white", stroke = 2) 

Ejemplo del dataset de diamantes

Ahora utilizaremos un dataset diferente.

Prices of over 50,000 round cut diamonds

Description

A dataset containing the prices and other attributes of almost 54,000 diamonds. The variables are as follows:

Usage

diamonds

Format

A data frame with 53940 rows and 10 variables:

price: price in US dollars ($326–$18,823)

carat: weight of the diamond (0.2–5.01)

cut: quality of the cut (Fair, Good, Very Good, Premium, Ideal)

color: diamond colour, from D (best) to J (worst)

clarity: a measurement of how clear the diamond is (I1 (worst), SI2, SI1, VS2, VS1, VVS2, VVS1, IF (best))

x: length in mm (0–10.74)

y: width in mm (0–58.9)

z: depth in mm (0–31.8)

depth: total depth percentage = z / mean(x, y) = 2 * z / (x + y) (43–79)

table: width of top of diamond relative to widest point (43–95)

head(diamonds)
## # A tibble: 6 x 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut))

ggplot(data = diamonds)+
  stat_count(mapping = aes(x=cut))

Generammos un objto tribble para comprender un poco mejor

demo_diamonds <- tribble(
  ~cut,       ~freqs,
  "Fair",       1610,
  "Good",       4906,
  "Very Good", 12082,
  "Premium",   13791,
  "Ideal",     21551
)
ggplot(data = demo_diamonds) + 
  geom_bar(mapping = aes(x=cut, y = freqs), 
           stat = "identity")

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))

ggplot(data = diamonds) + 
  stat_summary(
    mapping = aes(x = cut, y = depth),
    fun.ymin = min,
    fun.ymax = max, 
    fun.y = median
  )
## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: `fun.ymin` is deprecated. Use `fun.min` instead.
## Warning: `fun.ymax` is deprecated. Use `fun.max` instead.

Colores y formas de los gráficos

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, colour = cut))

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, fill = cut))

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, fill = clarity))

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, fill = color))

Ahora usando position = “identity”

ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) + 
  geom_bar(alpha = 0.2, position = "identity")

ggplot(data = diamonds, mapping = aes(x = cut, colour = clarity)) + 
  geom_bar(fill = NA, position = "identity")

Ahora usando position = “fill”

ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) + 
  geom_bar(position = "fill")

position = “dodge”

ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) + 
  geom_bar(position = "dodge")

Volvemos al scatterplot usando position = “jitter”

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + 
  geom_point( position = "jitter" )

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + 
  geom_jitter()

Sistemas de Coordenadas

#coord_flip() -> cambia los papeles de x e y
ggplot(data = mpg, mapping = aes(x=class, y = hwy)) + 
  geom_boxplot()

ggplot(data = mpg, mapping = aes(x=class, y = hwy)) + 
  geom_boxplot() + 
  coord_flip()

Ahora coord_quickmap() -> configura el aspect ratio para mapas

usa <- map_data("usa")

ggplot(usa, aes(long, lat, group = group)) + 
  geom_polygon(fill = "blue", color = "white") + 
  coord_quickmap()

italy <- map_data("italy")

ggplot(italy, aes(long, lat, group = group)) + 
  geom_polygon(fill = "blue", color = "white") + 
  coord_quickmap()

coord_polar()

ggplot(data = diamonds) + 
  geom_bar(
    mapping = aes(x = cut, fill = cut),
    show.legend = F,
    width = 1
  ) +
  theme(aspect.ratio = 1) + 
  labs(x = NULL, y = NULL) + 
  coord_polar()

ggplot(data = mpg, mapping = aes(x = cty, y = hwy )) + 
  geom_point() + 
  geom_abline() + 
  coord_fixed()

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, fill = color))+ 
  theme(aspect.ratio = 1) + 
  labs(x = NULL, y = NULL) + 
  coord_polar()

Gramática por capas de ggplot2 (más general)

#ggplot(data = <DATA_FRAME>) +
#  <GEOM_FUNCTION>(
#                  mapping = aes(<MAPPINGS>),
#                  stat = <STAT>,
#                  position = <POSITION>
#                 ) + 
#   <COORDINATE_FUNCTION>() + 
#   <FACET_FUNCTION>()

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = clarity, fill = clarity, y = ..count..)) +
  coord_polar() +
  facet_wrap(~cut) +
  labs(x=NULL, y = NULL, title = "Ejemplo final de ggplot",
       caption = "Dos variables cruzadas de diamonds",
       subtitle="Aprender ggplot puede ser divertido")