cargar la libreria para hacer unos graficos
2 + 2 ???
library(ggplot2)
library(dplyr)
glimpse(mpg)
Observations: 234
Variables: 11
$ manufacturer <chr> "audi", "audi", "audi", "audi", "audi", "audi", "audi", "audi", "...
$ model <chr> "a4", "a4", "a4", "a4", "a4", "a4", "a4", "a4 quattro", "a4 quatt...
$ displ <dbl> 1.8, 1.8, 2.0, 2.0, 2.8, 2.8, 3.1, 1.8, 1.8, 2.0, 2.0, 2.8, 2.8, ...
$ year <int> 1999, 1999, 2008, 2008, 1999, 1999, 2008, 1999, 1999, 2008, 2008,...
$ cyl <int> 4, 4, 4, 4, 6, 6, 6, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8,...
$ trans <chr> "auto(l5)", "manual(m5)", "manual(m6)", "auto(av)", "auto(l5)", "...
$ drv <chr> "f", "f", "f", "f", "f", "f", "f", "4", "4", "4", "4", "4", "4", ...
$ cty <int> 18, 21, 20, 21, 16, 18, 18, 18, 16, 20, 19, 15, 17, 17, 15, 15, 1...
$ hwy <int> 29, 29, 31, 30, 26, 26, 27, 26, 25, 28, 27, 25, 25, 25, 25, 24, 2...
$ fl <chr> "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", ...
$ class <chr> "compact", "compact", "compact", "compact", "compact", "compact",...
Apretando control + shif + enter = se cargan los datos
Con el signo de pregunta antes de “mpg” salen las variables en el cuadro de “help”
Con la palabra “glimpse” se abren los datos con otra dispocision. Entre parentesis “mpg”
“paquete” = app para RStudio
library(skimr)
skim(mpg)
Skim summary statistics
n obs: 234
n variables: 11
-- Variable type:character -----------------------------------------------------
variable missing complete n min max empty n_unique
class 0 234 234 3 10 0 7
drv 0 234 234 1 1 0 3
fl 0 234 234 1 1 0 5
manufacturer 0 234 234 4 10 0 15
model 0 234 234 2 22 0 38
trans 0 234 234 8 10 0 10
-- Variable type:integer -------------------------------------------------------
variable missing complete n mean sd p0 p25 p50 p75 p100 hist
cty 0 234 234 16.86 4.26 9 14 17 19 35 ▅▇▇▇▁▁▁▁
cyl 0 234 234 5.89 1.61 4 4 6 8 8 ▇▁▁▇▁▁▁▇
hwy 0 234 234 23.44 5.95 12 18 24 27 44 ▃▇▃▇▅▁▁▁
year 0 234 234 2003.5 4.51 1999 1999 2003.5 2008 2008 ▇▁▁▁▁▁▁▇
-- Variable type:numeric -------------------------------------------------------
variable missing complete n mean sd p0 p25 p50 p75 p100 hist
displ 0 234 234 3.47 1.29 1.6 2.4 3.3 4.6 7 ▇▇▅▅▅▃▂▁
mpg = datos del paquete ggplot2
glimpse y skim son dos formas para conocer los datos
mapeo = como vincular los datos a propiedades visuales
nuestra data en la siguiente ocasion es “mpg”
para agregar propiedades visuales hay que agregar “mapping”
el nombre de la variable la sacamos de data rectangular mpg y lo obtenemos de skim o glimpse
eje x = horizontal; eje y = vertical
geom son las opciones de figuras apra representar en el grafico
en este caso puntos = “geom_point”"
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = class, shape = drv )) +
geom_point()
Gráfico de dispersión = puntitos
Clase 2
library(ggplot2)
library(dplyr)
library(skimr)
algunos comandos útiles para explorar la data rectangular o dataframe o tibble
dplyr::glimpse(mpg)
Observations: 234
Variables: 11
$ manufacturer <chr> "audi", "audi", "audi", "audi", "audi", "audi", "audi", "audi", "...
$ model <chr> "a4", "a4", "a4", "a4", "a4", "a4", "a4", "a4 quattro", "a4 quatt...
$ displ <dbl> 1.8, 1.8, 2.0, 2.0, 2.8, 2.8, 3.1, 1.8, 1.8, 2.0, 2.0, 2.8, 2.8, ...
$ year <int> 1999, 1999, 2008, 2008, 1999, 1999, 2008, 1999, 1999, 2008, 2008,...
$ cyl <int> 4, 4, 4, 4, 6, 6, 6, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8,...
$ trans <chr> "auto(l5)", "manual(m5)", "manual(m6)", "auto(av)", "auto(l5)", "...
$ drv <chr> "f", "f", "f", "f", "f", "f", "f", "4", "4", "4", "4", "4", "4", ...
$ cty <int> 18, 21, 20, 21, 16, 18, 18, 18, 16, 20, 19, 15, 17, 17, 15, 15, 1...
$ hwy <int> 29, 29, 31, 30, 26, 26, 27, 26, 25, 28, 27, 25, 25, 25, 25, 24, 2...
$ fl <chr> "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", ...
$ class <chr> "compact", "compact", "compact", "compact", "compact", "compact",...
skim(mpg)
Skim summary statistics
n obs: 234
n variables: 11
-- Variable type:character -----------------------------------------------------
variable missing complete n min max empty n_unique
class 0 234 234 3 10 0 7
drv 0 234 234 1 1 0 3
fl 0 234 234 1 1 0 5
manufacturer 0 234 234 4 10 0 15
model 0 234 234 2 22 0 38
trans 0 234 234 8 10 0 10
-- Variable type:integer -------------------------------------------------------
variable missing complete n mean sd p0 p25 p50 p75 p100 hist
cty 0 234 234 16.86 4.26 9 14 17 19 35 ▅▇▇▇▁▁▁▁
cyl 0 234 234 5.89 1.61 4 4 6 8 8 ▇▁▁▇▁▁▁▇
hwy 0 234 234 23.44 5.95 12 18 24 27 44 ▃▇▃▇▅▁▁▁
year 0 234 234 2003.5 4.51 1999 1999 2003.5 2008 2008 ▇▁▁▁▁▁▁▇
-- Variable type:numeric -------------------------------------------------------
variable missing complete n mean sd p0 p25 p50 p75 p100 hist
displ 0 234 234 3.47 1.29 1.6 2.4 3.3 4.6 7 ▇▇▅▅▅▃▂▁
ggplot() +
geom_point(data = mpg, mapping = aes(x = displ, y = hwy, color = class, shape = ))
en el gráfico anterior se ordenaron los puntos con la ecuación de arriba, la x es igual a la capacidad del estanque, y es igual a los kilometros que puede recorrer con el estanque lleno
con “mpg” en la consola salen los valores
poco a poco se pueden omitir datos, por ejemplo en este caso nos saltamos “data” y vamos directo a “mpg”
cola de chancho alt + 126
ggplot(mpg) +
geom_point(aes(x = displ, y = hwy, color = class)) +
facet_wrap(~ drv)
ggplot(data = mpg) +
geom_line(mapping = aes(x = displ, y = hwy )
)
ggplot (mpg) +
geom_bar (mapping = aes(x = drv ))
data = data_autos_resumida = tribble (
~ tipo_traccion, ~num_obs,
"4" , 104,
"f" , 102,
"r" , 25
)
data_autos_resumida
ggplot(data = data_autos_resumida) +
geom_bar(mapping = aes(x = tipo_traccion, y = num_obs),
stat = "identity")
ggplot(mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = class)) +
geom_smooth(mapping = aes(x = displ, y = hwy))
Ahora sacaremos el atributo Smooth (linea) y cambiaremos el color de los puntos en el segundo parentesis
ggplot(mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = class), color = "green")
Ahora agregaremos otra data agrandando el tamaño de un nuevo punto y cambiando color
ggplot(mpg) +
geom_point(mapping = aes(x = displ, y = hwy), color = blue) +
geom_ponit(mapping = aes(x = displ, y = hwy),
data = data.frame(displ = 4, hwy = 40),
colour = "red",
size = 4)
Error in layer(data = data, mapping = mapping, stat = stat, geom = GeomPoint, :
object 'blue' not found
actividad
library(dplyr)
filter(mtcars, cyl == 6)
library(gapminder)
gapminder
Africa_continente = filter(gapminder,continent == "Africa")
dplyr::glimpse(gapminder)
Observations: 1,704
Variables: 6
$ country <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Afghanistan, Afg...
$ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, As...
$ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002, 20...
$ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.822, 41.6...
$ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12881816, ...
$ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, 978.0114...
ggplot(data = Africa_continente) +
geom_point(mapping = aes(x = gdpPercap, y = lifeExp, size = pop, color = year))