Cargar paquete

library(readr)

## Warning: package 'readr' was built under R version 4.0.4

Cargar csv

read_csv("data/gapminder_comas.csv")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   country = col_character(),
##   continent = col_character(),
##   year = col_double(),
##   lifeExp = col_double(),
##   pop = col_double(),
##   gdpPercap = col_double()
## )

## # A tibble: 1,704 x 6
##    country     continent  year lifeExp      pop gdpPercap
##    <chr>       <chr>     <dbl>   <dbl>    <dbl>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
## # ... with 1,694 more rows

Asignar nombre a datos

datos_obtenidos_csv <- read_csv("data/gapminder_comas.csv")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   country = col_character(),
##   continent = col_character(),
##   year = col_double(),
##   lifeExp = col_double(),
##   pop = col_double(),
##   gdpPercap = col_double()
## )

Leer delimitar arbitrario michi

read_delim("data/gapminder_michi.txt", delim = "#")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   country = col_character(),
##   continent = col_character(),
##   year = col_double(),
##   lifeExp = col_double(),
##   pop = col_double(),
##   gdpPercap = col_double()
## )

## # A tibble: 1,704 x 6
##    country     continent  year lifeExp      pop gdpPercap
##    <chr>       <chr>     <dbl>   <dbl>    <dbl>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
## # ... with 1,694 more rows

Asignar nombre

datos_obtenidos_michi_txt <- read_delim("data/gapminder_michi.txt", delim = "#")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   country = col_character(),
##   continent = col_character(),
##   year = col_double(),
##   lifeExp = col_double(),
##   pop = col_double(),
##   gdpPercap = col_double()
## )

Leer delimitar arbitrario slash

read_delim("data/gapminder_slash.txt", delim = "/")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   country = col_character(),
##   continent = col_character(),
##   year = col_double(),
##   lifeExp = col_double(),
##   pop = col_double(),
##   gdpPercap = col_double()
## )

## # A tibble: 1,704 x 6
##    country     continent  year lifeExp      pop gdpPercap
##    <chr>       <chr>     <dbl>   <dbl>    <dbl>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
## # ... with 1,694 more rows

Asigno nombre

datos_obtenidos_txt <- (read_delim("data/gapminder_slash.txt", delim = "/"))

## 
## -- Column specification --------------------------------------------------------
## cols(
##   country = col_character(),
##   continent = col_character(),
##   year = col_double(),
##   lifeExp = col_double(),
##   pop = col_double(),
##   gdpPercap = col_double()
## )

Leer delimitar arbitrario guiones

read_delim("data/gapminder_guiones.txt", delim = "-")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   country = col_character(),
##   continent = col_character(),
##   year = col_double(),
##   lifeExp = col_double(),
##   pop = col_double(),
##   gdpPercap = col_double()
## )

## # A tibble: 1,704 x 6
##    country     continent  year lifeExp      pop gdpPercap
##    <chr>       <chr>     <dbl>   <dbl>    <dbl>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
## # ... with 1,694 more rows

Asigno nombre

datos_obtenidos_guiones_txt <- read_delim("data/gapminder_guiones.txt", delim = "-")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   country = col_character(),
##   continent = col_character(),
##   year = col_double(),
##   lifeExp = col_double(),
##   pop = col_double(),
##   gdpPercap = col_double()
## )

Leer delimita punto y coma

read_delim("data/gapminder_comas2.txt", delim = ";")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   country = col_character(),
##   continent = col_character(),
##   year = col_double(),
##   lifeExp = col_double(),
##   pop = col_double(),
##   gdpPercap = col_double()
## )

## # A tibble: 1,704 x 6
##    country     continent  year lifeExp      pop gdpPercap
##    <chr>       <chr>     <dbl>   <dbl>    <dbl>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
## # ... with 1,694 more rows

Asigno nombre

datos_obtenidos_puntoycoma_txt <- read_delim("data/gapminder_comas2.txt", delim = ";")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   country = col_character(),
##   continent = col_character(),
##   year = col_double(),
##   lifeExp = col_double(),
##   pop = col_double(),
##   gdpPercap = col_double()
## )

Leer excel

library(readxl)

## Warning: package 'readxl' was built under R version 4.0.4

read_xlsx("data/gapminder_excel.xlsx")

## # A tibble: 1,704 x 6
##    country     continent  year lifeExp      pop gdpPercap
##    <chr>       <chr>     <dbl>   <dbl>    <dbl>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
## # ... with 1,694 more rows

Asigno nombre

gapminder_excel <- read_xlsx("data/gapminder_excel.xlsx")

Manera alternativa usando read_excel()

read_excel("data/gapminder_excel.xlsx")

## # A tibble: 1,704 x 6
##    country     continent  year lifeExp      pop gdpPercap
##    <chr>       <chr>     <dbl>   <dbl>    <dbl>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
## # ... with 1,694 more rows

Asigno nombre

gapminder_excel_alter <- read_excel("data/gapminder_excel.xlsx")

Leer SPSS

library(haven)

## Warning: package 'haven' was built under R version 4.0.4

read_sav("data/09_UNIVERSIDADES_CARATULA.SAV")

## # A tibble: 122 x 8
##    SELECT UC0DD_CD UC0DD_DPTO UC0PP_CD UC0PP_PROV UC0DI_CD UC0DI_DIST   UC0P_OBS
##    <chr>  <chr>    <chr>      <chr>    <chr>      <chr>    <chr>        <chr>   
##  1 U0001  16       LORETO     01       MAYNAS     13       SAN JUAN BA~ ""      
##  2 U0002  14       LAMBAYEQUE 03       LAMBAYEQUE 01       LAMBAYEQUE   ""      
##  3 U0003  15       LIMA       01       LIMA       40       SANTIAGO DE~ ""      
##  4 U0004  15       LIMA       01       LIMA       13       JESUS MARIA  ""      
##  5 U0005  15       LIMA       01       LIMA       35       SAN MARTIN ~ ""      
##  6 U0006  15       LIMA       01       LIMA       40       SANTIAGO DE~ ""      
##  7 U0007  15       LIMA       01       LIMA       21       PUEBLO LIBRE ""      
##  8 U0008  23       TACNA      01       TACNA      01       TACNA        ""      
##  9 U0009  10       HUANUCO    01       HUANUCO    01       HUANUCO      ""      
## 10 U0010  15       LIMA       01       LIMA       13       JESUS MARIA  ""      
## # ... with 112 more rows

datos_spss <- read_sav("data/09_UNIVERSIDADES_CARATULA.SAV")

Forma alternativa

datos_spss2 <- read_spss("data/09_UNIVERSIDADES_CARATULA.SAV")

Ejemplo con skip columnas

gap_excel_skip <- read_xlsx("data/gapminder_excel_skip.xlsx", skip = 3)

Ejemplo con seleccion de hojas

sheet_excel <- read_xlsx("data/gapminder_excel_sheet.xlsx", sheet = 2)

Con seleccion de hojas y skip

sheet_skip_excel <- read_xlsx("data/gapminder_excel_sheet.xlsx", sheet = 2, skip= 4)

Nombres de la columna

excel_col_names <- read_xlsx("data/gapminder_excel_col_names.xlsx", col_names = FALSE)

## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...

Asignamos nombres a columnas

excel_col_names_full <- read_xlsx("data/gapminder_excel_col_names.xlsx", col_names = c("country", "continent", "year", "lifeExp", "pop", "gdpPercap"))

Leer datos de paquete

#install.packages(“vcdExtra”) solo en la consola (se instala una vez)

library(vcdExtra)

## Warning: package 'vcdExtra' was built under R version 4.0.5

## Loading required package: vcd

## Warning: package 'vcd' was built under R version 4.0.5

## Loading required package: grid

## Loading required package: gnm

## Warning: package 'gnm' was built under R version 4.0.5

datasets("ggplot2")

## Loading package: ggplot2

##              Item      class      dim
## 1        diamonds data.frame 53940x10
## 2       economics data.frame    574x6
## 3  economics_long data.frame   2870x4
## 4       faithfuld data.frame   5625x3
## 5     luv_colours data.frame    657x4
## 6         midwest data.frame   437x28
## 7             mpg data.frame   234x11
## 8          msleep data.frame    83x11
## 9    presidential data.frame     11x4
## 10          seals data.frame   1155x4
## 11      txhousing data.frame   8602x9
##                                                                Title
## 1                           Prices of over 50,000 round cut diamonds
## 2                                            US economic time series
## 3                                            US economic time series
## 4                           2d density estimate of Old Faithful data
## 5                                            'colors()' in Luv space
## 6                                               Midwest demographics
## 7  Fuel economy data from 1999 to 2008 for 38 popular models of cars
## 8       An updated and expanded version of the mammals sleep dataset
## 9                    Terms of 11 presidents from Eisenhower to Obama
## 10                                    Vector field of seal movements
## 11                                               Housing sales in TX

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.0.5

diamonds

## # A tibble: 53,940 x 10
##    carat cut       color clarity depth table price     x     y     z
##    <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
##  1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
##  2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
##  3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
##  4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
##  5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
##  6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
##  7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47
##  8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53
##  9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49
## 10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39
## # ... with 53,930 more rows

importacion

Felipe Bedoya

25/5/2021

Cargar paquete

Cargar csv

Asignar nombre a datos

Leer delimitar arbitrario michi

Asignar nombre

Leer delimitar arbitrario slash

Asigno nombre

Leer delimitar arbitrario guiones

Asigno nombre

Leer delimita punto y coma

Asigno nombre

Leer excel

Asigno nombre

Manera alternativa usando read_excel()

Asigno nombre

Leer SPSS

Forma alternativa

Ejemplo con skip columnas

Ejemplo con seleccion de hojas

Con seleccion de hojas y skip

Nombres de la columna

Asignamos nombres a columnas

Leer datos de paquete