AnƔlisis exploratorio de datos

library(ggplot2)
library(dplyr)
library(titanic)

Bind_rows juntar tablas por fila glimpse es una foto global de los datos

df <- dplyr::bind_rows(titanic::titanic_train)
(titanic::titanic_test)
glimpse(df)
Observations: 891
Variables: 12
$ PassengerId <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,...
$ Survived    <int> 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0,...
$ Pclass      <int> 3, 1, 3, 1, 3, 3, 1, 3, 3, 2, 3, 1, 3, 3, 3, 2, 3, 2, 3, 3, 2, 2, 3, 1, 3,...
$ Name        <chr> "Braund, Mr. Owen Harris", "Cumings, Mrs. John Bradley (Florence Briggs Th...
$ Sex         <chr> "male", "female", "female", "female", "male", "male", "male", "male", "fem...
$ Age         <dbl> 22, 38, 26, 35, 35, NA, 54, 2, 27, 14, 4, 58, 20, 39, 14, 55, 2, NA, 31, N...
$ SibSp       <int> 1, 1, 0, 1, 0, 0, 0, 3, 0, 1, 1, 0, 0, 1, 0, 0, 4, 0, 1, 0, 0, 0, 0, 0, 3,...
$ Parch       <int> 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 1, 0, 0, 5, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,...
$ Ticket      <chr> "A/5 21171", "PC 17599", "STON/O2. 3101282", "113803", "373450", "330877",...
$ Fare        <dbl> 7.2500, 71.2833, 7.9250, 53.1000, 8.0500, 8.4583, 51.8625, 21.0750, 11.133...
$ Cabin       <chr> "", "C85", "", "C123", "", "", "E46", "", "", "", "G6", "C103", "", "", ""...
$ Embarked    <chr> "S", "C", "S", "S", "S", "Q", "S", "S", "S", "C", "S", "S", "S", "S", "S",...
ggplot(data = df) +
geom_bar(mapping = aes(x = Sex), fill = "hotpink", colour = "white" ) +
  theme_bw() +
  ggtitle("Para obtener la viariación de una variable categórica: geom_bar")

count(df, Sex)

Control shit m = %>%

df %>% 
  select(Fare)
ggplot(data = df) + 
  geom_point(mapping = aes(x = Fare, y = 0), color = "hotpink") +
  theme_bw()

NA
count(df, Fare)
df %>% 
  count(Fare) %>% 
  ggplot() +
  geom_point(mapping = aes(x = Fare, y = 0, size = n),
             alpha = 1 / 5,
             shape = 21,
             fill = "hotpink",
             colour = "purple") +
  theme_bw()

df %>%
  select(Fare) %>% 
  mutate(intervalos = cut_width(Fare, 10))
ggplot(data = df) + 
  geom_histogram(mapping = aes (x = Fare), binwidth = 10, fill = "hotpink", colour = "mediumblue") + 
  theme_bw() +
  ggtitle("Para observar la variación de una variable continua: geom_histogram")

ggplot(data = df) +
  geom_histogram(mapping = aes(x = Fare, fill = Sex),
                 colour = "black") +
  scale_fill_viridis_d() +
  facet_wrap(~)
Error: unexpected ')' in:
"  scale_fill_viridis_d() +
  facet_wrap(~)"
ggplot(data = df) +
  geom_freqpoly(mapping = aes(x = Fare, colour = factor (Pclass)),
                binwidth = 10,
                size = 0.8) +
  theme_bw() +
  ggtitle ("Para comparar distintas distribuciones: geom_freqpoly")

df %>% 
  count(Pclass)
LS0tDQp0aXRsZTogIlRpZHkgQW50b25pYSBQdW1hcmlubyINCm91dHB1dDogDQogIGh0bWxfbm90ZWJvb2s6IA0KICAgIGhpZ2hsaWdodDogdGFuZ28NCiAgICB0aGVtZTogbHVtZW4NCi0tLQ0KDQojI0Fuw6FsaXNpcyBleHBsb3JhdG9yaW8gZGUgZGF0b3MNCg0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeSh0aXRhbmljKQ0KYGBgDQoNCkJpbmRfcm93cyBqdW50YXIgdGFibGFzIHBvciBmaWxhDQpnbGltcHNlIGVzIHVuYSBmb3RvIGdsb2JhbCBkZSBsb3MgZGF0b3MNCg0KYGBge3J9DQpkZiA8LSBkcGx5cjo6YmluZF9yb3dzKHRpdGFuaWM6OnRpdGFuaWNfdHJhaW4pDQoodGl0YW5pYzo6dGl0YW5pY190ZXN0KQ0KDQpnbGltcHNlKGRmKQ0KYGBgDQoNCg0KDQpgYGB7cn0NCmdncGxvdChkYXRhID0gZGYpICsNCmdlb21fYmFyKG1hcHBpbmcgPSBhZXMoeCA9IFNleCksIGZpbGwgPSAiaG90cGluayIsIGNvbG91ciA9ICJ3aGl0ZSIgKSArDQoNCiAgdGhlbWVfYncoKSArDQogIGdndGl0bGUoIlBhcmEgb2J0ZW5lciBsYSB2aWFyaWFjacOzbiBkZSB1bmEgdmFyaWFibGUgY2F0ZWfDs3JpY2E6IGdlb21fYmFyIikNCg0KYGBgDQoNCmBgYHtyfQ0KY291bnQoZGYsIFNleCkNCmBgYA0KDQpDb250cm9sIHNoaXQgbSA9ICU+JSANCg0KYGBge3J9DQpkZiAlPiUgDQogIHNlbGVjdChGYXJlKQ0KYGBgDQoNCg0KYGBge3J9DQpnZ3Bsb3QoZGF0YSA9IGRmKSArIA0KICBnZW9tX3BvaW50KG1hcHBpbmcgPSBhZXMoeCA9IEZhcmUsIHkgPSAwKSwgY29sb3IgPSAiaG90cGluayIpICsNCiAgdGhlbWVfYncoKQ0KICANCmBgYA0KDQoNCg0KDQpgYGB7cn0NCmNvdW50KGRmLCBGYXJlKQ0KYGBgDQoNCg0KYGBge3J9DQpkZiAlPiUgDQogIGNvdW50KEZhcmUpICU+JSANCiAgZ2dwbG90KCkgKw0KICBnZW9tX3BvaW50KG1hcHBpbmcgPSBhZXMoeCA9IEZhcmUsIHkgPSAwLCBzaXplID0gbiksDQogICAgICAgICAgICAgYWxwaGEgPSAxIC8gNSwNCiAgICAgICAgICAgICBzaGFwZSA9IDIxLA0KICAgICAgICAgICAgIGZpbGwgPSAiaG90cGluayIsDQogICAgICAgICAgICAgY29sb3VyID0gInB1cnBsZSIpICsNCiAgdGhlbWVfYncoKQ0KYGBgDQoNCg0KYGBge3J9DQpkZiAlPiUNCiAgc2VsZWN0KEZhcmUpICU+JSANCiAgbXV0YXRlKGludGVydmFsb3MgPSBjdXRfd2lkdGgoRmFyZSwgMTApKQ0KYGBgDQoNCg0KDQoNCmBgYHtyfQ0KZ2dwbG90KGRhdGEgPSBkZikgKyANCiAgZ2VvbV9oaXN0b2dyYW0obWFwcGluZyA9IGFlcyAoeCA9IEZhcmUpLCBiaW53aWR0aCA9IDEwLCBmaWxsID0gImhvdHBpbmsiLCBjb2xvdXIgPSAibWVkaXVtYmx1ZSIpICsgDQogIHRoZW1lX2J3KCkgKw0KICBnZ3RpdGxlKCJQYXJhIG9ic2VydmFyIGxhIHZhcmlhY2nDs24gZGUgdW5hIHZhcmlhYmxlIGNvbnRpbnVhOiBnZW9tX2hpc3RvZ3JhbSIpDQpgYGANCg0KDQoNCmBgYHtyfQ0KZ2dwbG90KGRhdGEgPSBkZikgKw0KICBnZW9tX2hpc3RvZ3JhbShtYXBwaW5nID0gYWVzKHggPSBGYXJlLCBmaWxsID0gU2V4KSwNCiAgICAgICAgICAgICAgICAgY29sb3VyID0gImJsYWNrIikgKw0KICBzY2FsZV9maWxsX3ZpcmlkaXNfZCgpICsNCiAgZmFjZXRfd3JhcCh+KQ0KYGBgDQoNCg0KYGBge3J9DQpnZ3Bsb3QoZGF0YSA9IGRmKSArDQogIGdlb21fZnJlcXBvbHkobWFwcGluZyA9IGFlcyh4ID0gRmFyZSwgY29sb3VyID0gZmFjdG9yIChQY2xhc3MpKSwNCiAgICAgICAgICAgICAgICBiaW53aWR0aCA9IDEwLA0KICAgICAgICAgICAgICAgIHNpemUgPSAwLjgpICsNCiAgdGhlbWVfYncoKSArDQogIGdndGl0bGUgKCJQYXJhIGNvbXBhcmFyIGRpc3RpbnRhcyBkaXN0cmlidWNpb25lczogZ2VvbV9mcmVxcG9seSIpDQpgYGANCg0KDQoNCg0KYGBge3J9DQpkZiAlPiUgDQogIGNvdW50KFBjbGFzcykNCmBgYA0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg==