5/4/2018

Mutate & family

Mutate: Compute new column(s)

library(dplyr)
msleep <- read.csv("./data/msleep_ggplot2.csv")
msleep %>%
mutate(sleep_new=sleep_total + sleep_rem) %>%
head()
##                         name      genus  vore        order conservation
## 1                    Cheetah   Acinonyx carni    Carnivora           lc
## 2                 Owl monkey      Aotus  omni     Primates         <NA>
## 3            Mountain beaver Aplodontia herbi     Rodentia           nt
## 4 Greater short-tailed shrew    Blarina  omni Soricomorpha           lc
## 5                        Cow        Bos herbi Artiodactyla domesticated
## 6           Three-toed sloth   Bradypus herbi       Pilosa         <NA>
##   sleep_total sleep_rem sleep_cycle awake brainwt  bodywt sleep_new
## 1        12.1        NA          NA  11.9      NA  50.000        NA
## 2        17.0       1.8          NA   7.0 0.01550   0.480      18.8
## 3        14.4       2.4          NA   9.6      NA   1.350      16.8
## 4        14.9       2.3   0.1333333   9.1 0.00029   0.019      17.2
## 5         4.0       0.7   0.6666667  20.0 0.42300 600.000       4.7
## 6        14.4       2.2   0.7666667   9.6      NA   3.850      16.6

Transmute: Compute new column(s), drop others.

head(transmute(msleep,sleep_new=sleep_rem+sleep_total))
##   sleep_new
## 1        NA
## 2      18.8
## 3      16.8
## 4      17.2
## 5       4.7
## 6      16.6

Mutate_if: Apply funs to all columns of one type. Reemplaza los datos antiguos con los nuevos.

msleep %>% 
mutate_if(is.numeric, log2)%>% 
head()
##                         name      genus  vore        order conservation
## 1                    Cheetah   Acinonyx carni    Carnivora           lc
## 2                 Owl monkey      Aotus  omni     Primates         <NA>
## 3            Mountain beaver Aplodontia herbi     Rodentia           nt
## 4 Greater short-tailed shrew    Blarina  omni Soricomorpha           lc
## 5                        Cow        Bos herbi Artiodactyla domesticated
## 6           Three-toed sloth   Bradypus herbi       Pilosa         <NA>
##   sleep_total  sleep_rem sleep_cycle    awake    brainwt     bodywt
## 1    3.596935         NA          NA 3.572890         NA  5.6438562
## 2    4.087463  0.8479969          NA 2.807355  -6.011588 -1.0588937
## 3    3.847997  1.2630344          NA 3.263034         NA  0.4329594
## 4    3.897240  1.2016339  -2.9068906 3.185867 -11.751659 -5.7178568
## 5    2.000000 -0.5145732  -0.5849625 4.321928  -1.241270  9.2288187
## 6    3.847997  1.1375035  -0.3833286 3.263034         NA  1.9448584

Mutate_all: Apply funs to every column.

msleep %>% 
  select(sleep_total, sleep_rem) %>%
  mutate_all(funs(log(.),log2(.))) %>%  
  names()
## [1] "sleep_total"      "sleep_rem"        "sleep_total_log" 
## [4] "sleep_rem_log"    "sleep_total_log2" "sleep_rem_log2"

Mutate_at: Apply funs to specific columns.

msleep %>%
mutate_at(vars(sleep_total),funs(log(.),log2(.))) %>%
  names()
##  [1] "name"         "genus"        "vore"         "order"       
##  [5] "conservation" "sleep_total"  "sleep_rem"    "sleep_cycle" 
##  [9] "awake"        "brainwt"      "bodywt"       "log"         
## [13] "log2"

Joins: Especificar las columnas que usaremos para hacer el join

"x" está en los dos data frames

df1 <- data_frame(x = c(1, 2), y = 2:1)
df2 <- data_frame(x = c(1, 3), a = 10, b = "a")
df1
## # A tibble: 2 x 2
##       x     y
##   <dbl> <int>
## 1  1.00     2
## 2  2.00     1
df2
## # A tibble: 2 x 3
##       x     a b    
##   <dbl> <dbl> <chr>
## 1  1.00  10.0 a    
## 2  3.00  10.0 a

Left join: left_join(x, y)

Returns all rows from "x", and all columns from "x" and "y" for the matching values.

Por default, toma todas las columnas que tienen los mismos nombres en ambas matrices. En este caso, solamente la variable "x".

df1
## # A tibble: 2 x 2
##       x     y
##   <dbl> <int>
## 1  1.00     2
## 2  2.00     1
df2
## # A tibble: 2 x 3
##       x     a b    
##   <dbl> <dbl> <chr>
## 1  1.00  10.0 a    
## 2  3.00  10.0 a
left_join(df1, df2)
## Joining, by = "x"
## # A tibble: 2 x 4
##       x     y     a b    
##   <dbl> <int> <dbl> <chr>
## 1  1.00     2  10.0 a    
## 2  2.00     1  NA   <NA>

Right join: right_join (x,y)

Returns all rows from y, and all columns from x and y.

df1
## # A tibble: 2 x 2
##       x     y
##   <dbl> <int>
## 1  1.00     2
## 2  2.00     1
df2
## # A tibble: 2 x 3
##       x     a b    
##   <dbl> <dbl> <chr>
## 1  1.00  10.0 a    
## 2  3.00  10.0 a
right_join(df1, df2)
## Joining, by = "x"
## # A tibble: 2 x 4
##       x     y     a b    
##   <dbl> <int> <dbl> <chr>
## 1  1.00     2  10.0 a    
## 2  3.00    NA  10.0 a

¿Qué pasa con los casos límite? (no match, multiple matches)

No matches

df10 <- data_frame(x = c(1, 2), y = 2:1)
df11 <- data_frame(x = c(4, 3), a = 10, b = "a")
df10
## # A tibble: 2 x 2
##       x     y
##   <dbl> <int>
## 1  1.00     2
## 2  2.00     1
df11
## # A tibble: 2 x 3
##       x     a b    
##   <dbl> <dbl> <chr>
## 1  4.00  10.0 a    
## 2  3.00  10.0 a
left_join(df10, df11)
## Joining, by = "x"
## # A tibble: 2 x 4
##       x     y     a b    
##   <dbl> <int> <dbl> <chr>
## 1  1.00     2    NA <NA> 
## 2  2.00     1    NA <NA>

Multiple matches (duplica observaciones)

df13 <- data_frame(x = c(1, 2), y = 2:1)
df9 <- data_frame(x = c(1, 3,1), a = c(10,9,8), b = "a")
df13
## # A tibble: 2 x 2
##       x     y
##   <dbl> <int>
## 1  1.00     2
## 2  2.00     1
df9
## # A tibble: 3 x 3
##       x     a b    
##   <dbl> <dbl> <chr>
## 1  1.00 10.0  a    
## 2  3.00  9.00 a    
## 3  1.00  8.00 a
left_join(df13, df9)
## Joining, by = "x"
## # A tibble: 3 x 4
##       x     y     a b    
##   <dbl> <int> <dbl> <chr>
## 1  1.00     2 10.0  a    
## 2  1.00     2  8.00 a    
## 3  2.00     1 NA    <NA>

El orden no importa

El orden de las filas no importa, solo el nombre:

df3 <- data_frame(y = 2:1,x = c(1, 2))
df4 <- data_frame(a = 10, b = "a", x = c(1, 3))
df3
## # A tibble: 2 x 2
##       y     x
##   <int> <dbl>
## 1     2  1.00
## 2     1  2.00
df4
## # A tibble: 2 x 3
##       a b         x
##   <dbl> <chr> <dbl>
## 1  10.0 a      1.00
## 2  10.0 a      3.00
left_join(df3,df4)
## Joining, by = "x"
## # A tibble: 2 x 4
##       y     x     a b    
##   <int> <dbl> <dbl> <chr>
## 1     2  1.00  10.0 a    
## 2     1  2.00  NA   <NA>

¿Y si la variable a usar para el merge no tiene el mismo nombre?

Si los nombres de las filas que se quieren usar como referencia no coinciden, hay que usar "by"

df5 <- data_frame(z = c(1, 2), y = 2:1)
df4 <- data_frame(a = 10, b = "a", x = c(1, 3))
df5
## # A tibble: 2 x 2
##       z     y
##   <dbl> <int>
## 1  1.00     2
## 2  2.00     1
df4
## # A tibble: 2 x 3
##       a b         x
##   <dbl> <chr> <dbl>
## 1  10.0 a      1.00
## 2  10.0 a      3.00
left_join(df5,df4, by = c("z"="x")) #busca "z" en df5 y "x" en df4
## # A tibble: 2 x 4
##       z     y     a b    
##   <dbl> <int> <dbl> <chr>
## 1  1.00     2  10.0 a    
## 2  2.00     1  NA   <NA>

¿Y si hay más de una variable para hacer el merge?

En este caso, los match se hacen para aquellas observaciones que coincidan, a la vez, con los valores de la variable en la matriz original.

Por ejemplo, en la matriz "x" tenemos los pares (1,14) y (2,13). En la matrix "y" solamente existe el par (1,14), por lo que esa fila tendrá el match.

df6 <- data_frame(x = c(1, 2), y = 2:1, o=c(14,13))
df7 <- data_frame(a = 10, b = "a", x = c(1, 3), o=c(14,14))
df6
## # A tibble: 2 x 3
##       x     y     o
##   <dbl> <int> <dbl>
## 1  1.00     2  14.0
## 2  2.00     1  13.0
df7
## # A tibble: 2 x 4
##       a b         x     o
##   <dbl> <chr> <dbl> <dbl>
## 1  10.0 a      1.00  14.0
## 2  10.0 a      3.00  14.0
left_join(df6,df7)
## Joining, by = c("x", "o")
## # A tibble: 2 x 5
##       x     y     o     a b    
##   <dbl> <int> <dbl> <dbl> <chr>
## 1  1.00     2  14.0  10.0 a    
## 2  2.00     1  13.0  NA   <NA>

Inner join

Retains only rows with matches.

Es decir, hace matches como los anteriores pero solo retiene las filas donde hubo matches, las otras las descarta.

df1
## # A tibble: 2 x 2
##       x     y
##   <dbl> <int>
## 1  1.00     2
## 2  2.00     1
df2
## # A tibble: 2 x 3
##       x     a b    
##   <dbl> <dbl> <chr>
## 1  1.00  10.0 a    
## 2  3.00  10.0 a
inner_join(df1, df2)
## Joining, by = "x"
## # A tibble: 1 x 4
##       x     y     a b    
##   <dbl> <int> <dbl> <chr>
## 1  1.00     2  10.0 a

Full join

Retain all values, all rows.

Es decir, muestra las filas que tienen match pero también las que no tuvieron match.

En este caso, la fila 1 de la matrix "x" y la fila 1 de la matriz "y" hacen match, por lo que aparecen como una sola fila.

Luego, la fila 2 de la matrix "x" y la fila 2 de la matriz "y" aparecen también en la matriz final, aún cuando no había matches para ellas.

df1 <- data_frame(x = c(1, 2), y = 2:1)
df2 <- data_frame(x = c(4, 3), a = 10, b = "a")
df1
## # A tibble: 2 x 2
##       x     y
##   <dbl> <int>
## 1  1.00     2
## 2  2.00     1
df2
## # A tibble: 2 x 3
##       x     a b    
##   <dbl> <dbl> <chr>
## 1  4.00  10.0 a    
## 2  3.00  10.0 a
full_join(df1, df2)
## Joining, by = "x"
## # A tibble: 4 x 4
##       x     y     a b    
##   <dbl> <int> <dbl> <chr>
## 1  1.00     2  NA   <NA> 
## 2  2.00     1  NA   <NA> 
## 3  4.00    NA  10.0 a    
## 4  3.00    NA  10.0 a

Semi join

Returns all rows from "x" where there are matching values in "y", keeping just columns from "x".

A semi join differs from an inner join because an inner join will return one row of "x" for each matching row of "y", where a semi join will never duplicate rows of "x".

df8 <- data_frame(x = c(1, 2), y = 2:1)
df7 <- data_frame(a = 10, b = "a", x = c(1, 2))

df8
## # A tibble: 2 x 2
##       x     y
##   <dbl> <int>
## 1  1.00     2
## 2  2.00     1
df7
## # A tibble: 2 x 3
##       a b         x
##   <dbl> <chr> <dbl>
## 1  10.0 a      1.00
## 2  10.0 a      2.00
semi_join(df8, df7)
## Joining, by = "x"
## # A tibble: 2 x 2
##       x     y
##   <dbl> <int>
## 1  1.00     2
## 2  2.00     1

Anti join

Returns all rows from "x" where there are not matching values in "y", keeping just columns from "x".

df1
## # A tibble: 2 x 2
##       x     y
##   <dbl> <int>
## 1  1.00     2
## 2  2.00     1
df2
## # A tibble: 2 x 3
##       x     a b    
##   <dbl> <dbl> <chr>
## 1  4.00  10.0 a    
## 2  3.00  10.0 a
anti_join(df1, df2)
## Joining, by = "x"
## # A tibble: 2 x 2
##       x     y
##   <dbl> <int>
## 1  1.00     2
## 2  2.00     1

Ejercicio

Con datos del Zonas Francas: + agregar ciiu3 + agregar seccion

Datos

zzff.10 <- readstata13::read.dta13('~/bases/zzff/2010/zf_2010.dta')
zzff.10 %>% select(A1, G1) %>% head
##                               A1   G1
## 1                         UPM SA 1701
## 2       PRESSUR CORPORATION S.A. 1811
## 3                 TINGELSUR S.A. 1610
## 4 PEPSI COLA MFG. OF URUGUAY SRL 1079
## 5           Andritz Uruguay S.A. 3312
## 6            Kemira Uruguay S.A. 2011

CIIUS (1)

CIIUS (1)

library(stringr)
library(dplyr)
df <- abrir("data/ciius.txt") %>% 
      mutate(ciiu_3 = ciiu_3(lineas),
             ciiu_4 = ciiu_4(lineas),
             rubro  = rubro(lineas))

Pregunta

¿Qué hacemos cuando un CIIU4 se corresponde con muchos CIIU3 (iguales/distintos)?

df %>% 
  group_by(ciiu_4) %>% 
  summarize(ciiu3s=n_distinct(ciiu_3)) %>%
  filter(ciiu3s > 3)
## # A tibble: 3 x 2
##   ciiu_4 ciiu3s
##   <chr>   <int>
## 1 4390        4
## 2 5229        4
## 3 8620        4
df %>% 
  filter(ciiu_4 ==  4390)
## # A tibble: 5 x 4
##   lineas                          ciiu_3 ciiu… rubro                      
##   <chr>                           <chr>  <chr> <chr>                      
## 1 4390,0 Otras actividades espec… 4500   4390  " Otras actividades especi…
## 2 4390,3 Construcción de pilotaj… 4521   4390  " Construcción de pilotaje…
## 3 4390,1 Actividades de impermea… 4540   4390  " Actividades de impermeab…
## 4 4390,2 Armado y desarmado de e… 4540   4390  " Armado y desarmado de es…
## 5 4390,9 Otras actividades espec… 4550   4390  " Otras actividades especi…

Nos quedamos con el primero

tabla_nueva <- df %>% 
  group_by(ciiu_4) %>%
  summarize(ciiu_3 = first(ciiu_3))
tabla_nueva %>% 
  group_by(ciiu_4) %>%
  summarize(ciiu3s=n_distinct(ciiu_3)) %>%
  group_by(ciiu3s) %>%
  summarize(n=n())
## # A tibble: 1 x 2
##   ciiu3s     n
##    <int> <int>
## 1      1   427

Y hacemos el left_join tranquilos

names(zzff.10)
##    [1] "V1546"    "Ano"      "A1"       "A2"       "A3"       "A4"      
##    [7] "A5"       "A6"       "A7"       "A8"       "A9_0"     "A9_1"    
##   [13] "A9_2"     "A10"      "B1"       "B2"       "B3"       "B4"      
##   [19] "B5_1"     "B5_2"     "B5_3"     "B5_4"     "B5_5"     "B5_6"    
##   [25] "B6"       "B7"       "B8"       "B9"       "B10"      "B11"     
##   [31] "D1"       "D2"       "D3"       "D4"       "D5"       "E1"      
##   [37] "E2"       "E3"       "E4"       "E5"       "F1"       "F1_1"    
##   [43] "F2"       "F2_1"     "F3"       "F3_1"     "F4"       "F4_1"    
##   [49] "G1"       "G1_1"     "G1_2"     "G2"       "G2_1"     "G2_2"    
##   [55] "G3"       "G3_1"     "G3_2"     "G4"       "H1_1"     "H1_2"    
##   [61] "H1_2_A"   "H1_3"     "H1_4"     "H1_5"     "H1_6"     "H1_7"    
##   [67] "H1_8"     "H1_9"     "H1_10"    "H1_11"    "H1_12"    "H1_13"   
##   [73] "H1_14"    "H1_15"    "H1_16"    "H1_17"    "H1_18"    "H1_19"   
##   [79] "H1_20"    "H1_21"    "H2_1"     "H2_2"     "H2_3"     "H2_4"    
##   [85] "H2_5"     "H2_6"     "H2_7"     "H2_8"     "H2_9"     "H2_10"   
##   [91] "H2_11"    "H2_12"    "H2_13"    "H2_14"    "H2_15"    "H2_16"   
##   [97] "H2_17"    "H2_18"    "H2_19"    "H2_20"    "H2_21"    "H3_1"    
##  [103] "H3_2"     "H3_3"     "H3_4"     "H3_5"     "H3_6"     "H3_7"    
##  [109] "H3_8"     "H3_9"     "H3_10"    "H3_11"    "H3_12"    "H3_13"   
##  [115] "H3_14"    "H3_15"    "H3_16"    "H3_17"    "H3_18"    "H3_19"   
##  [121] "H3_20"    "H3_21"    "__2H1_1"  "__2H1_2"  "__2H1_3"  "__2H1_4" 
##  [127] "__2H1_5"  "__2H1_6"  "__2H1_7"  "__2H1_8"  "__2H1_9"  "__2H1_10"
##  [133] "__2H1_11" "__2H1_12" "__2H1_13" "__2H1_14" "__2H1_15" "__2H2_1" 
##  [139] "__2H2_2"  "__2H2_3"  "__2H2_4"  "__2H2_5"  "__2H2_6"  "__2H2_7" 
##  [145] "__2H2_8"  "__2H2_9"  "__2H2_10" "__2H2_11" "__2H2_12" "__2H2_13"
##  [151] "__2H2_14" "__2H2_15" "__2H3_1"  "__2H3_2"  "__2H3_3"  "__2H3_4" 
##  [157] "__2H3_5"  "__2H3_6"  "__2H3_7"  "__2H3_8"  "__2H3_9"  "__2H3_10"
##  [163] "__2H3_11" "__2H3_12" "__2H3_13" "__2H3_14" "__2H3_15" "__2H4_1" 
##  [169] "__2H4_2"  "__2H4_3"  "__2H4_4"  "__2H4_5"  "__2H4_6"  "__2H4_7" 
##  [175] "__2H4_8"  "__2H4_9"  "__2H4_10" "__2H4_11" "__2H4_12" "__2H4_13"
##  [181] "__2H4_14" "__2H4_15" "I1_1"     "I1_2"     "I1_3"     "I2_1"    
##  [187] "I2_2"     "I2_3"     "I3_1"     "I3_2"     "I3_3"     "I4_1"    
##  [193] "I4_2"     "I4_3"     "I5_1"     "I5_2"     "I5_3"     "I6_1"    
##  [199] "I6_2"     "I6_3"     "I7_1"     "I7_2"     "I7_3"     "J1_1"    
##  [205] "J1_2"     "J2_1"     "J2_2"     "J3_1"     "J3_2"     "J4_1"    
##  [211] "J4_2"     "J5_1"     "J5_2"     "J6_1"     "J6_2"     "J7_1"    
##  [217] "J7_2"     "K1_1"     "K1_2"     "K1_3"     "K1_4"     "K1_5"    
##  [223] "K1_6"     "K2_1"     "K2_2"     "K2_3"     "K2_4"     "K2_5"    
##  [229] "K2_6"     "K3_1"     "K3_2"     "K3_3"     "K3_4"     "K3_5"    
##  [235] "K3_6"     "K4_1"     "K4_2"     "K4_3"     "K4_4"     "K4_5"    
##  [241] "K4_6"     "K5_1"     "K5_2"     "K5_3"     "K5_4"     "K5_5"    
##  [247] "K5_6"     "K6_1"     "K6_2"     "K6_3"     "K6_4"     "K6_5"    
##  [253] "K6_6"     "K7_1"     "K7_2"     "K7_3"     "K7_4"     "K7_5"    
##  [259] "K7_6"     "K8_1"     "K8_2"     "K8_3"     "K8_4"     "K8_5"    
##  [265] "K8_6"     "K9_1"     "K9_2"     "K9_3"     "K9_4"     "K9_5"    
##  [271] "K9_6"     "L1"       "L2"       "L3"       "L4_1"     "L4_2"    
##  [277] "L5"       "L6"       "L7"       "L8"       "L9"       "L10"     
##  [283] "L10_1"    "L10_2"    "M1_1"     "M1_2"     "M1_3"     "M1_4"    
##  [289] "M1_5"     "M1_6"     "M2_1"     "M2_2"     "M2_3"     "M2_4"    
##  [295] "M2_5"     "M2_6"     "M3_1"     "M3_2"     "M3_3"     "M3_4"    
##  [301] "M3_5"     "M3_6"     "M4_1"     "M4_2"     "M4_3"     "M4_4"    
##  [307] "M4_5"     "M4_6"     "M5_1"     "M5_2"     "M5_3"     "M5_4"    
##  [313] "M5_5"     "M5_6"     "M6_1"     "M6_2"     "M6_3"     "M6_4"    
##  [319] "M6_5"     "M6_6"     "M7_1"     "M7_2"     "M7_3"     "M7_4"    
##  [325] "M7_5"     "M7_6"     "M8_1_A"   "M8_2"     "M8_3"     "M8_4"    
##  [331] "M8_5"     "M8_6"     "M9_1"     "M9_2"     "M9_3"     "M9_4"    
##  [337] "M9_5"     "M9_6"     "MP1"      "MP2"      "MP3"      "MP4"     
##  [343] "MP5"      "MP6"      "M10_1"    "M10_2"    "M10_3"    "M10_4"   
##  [349] "M10_5"    "M10_6"    "M11_1"    "M11_2"    "M11_3"    "M11_4"   
##  [355] "M11_5"    "M11_6"    "M12_1"    "M12_2"    "M12_3"    "M12_4"   
##  [361] "M12_5"    "M12_6"    "M8_1"     "M13_2"    "M13_3"    "M13_4"   
##  [367] "M13_5"    "M13_6"    "M14_1"    "M14_2"    "M14_3"    "M14_4"   
##  [373] "M14_5"    "M14_6"    "M15_1"    "M15_2"    "M15_3"    "M15_4"   
##  [379] "M15_5"    "M15_6"    "M16_1"    "M16_2"    "M16_3"    "M16_4"   
##  [385] "M16_5"    "M16_6"    "M17_1"    "M17_2"    "M17_3"    "M17_4"   
##  [391] "M17_5"    "M17_6"    "M18_1"    "M18_2"    "M18_3"    "M18_4"   
##  [397] "M18_5"    "M18_6"    "M19_1"    "M19_2"    "M19_3"    "M19_4"   
##  [403] "M19_5"    "M19_6"    "M20_1"    "M20_2"    "M20_3"    "M20_4"   
##  [409] "M20_5"    "M20_6"    "M21_1"    "M21_2"    "M21_3"    "M21_4"   
##  [415] "M21_5"    "M21_6"    "M22_1"    "M22_2"    "M22_3"    "M22_4"   
##  [421] "M22_5"    "M22_6"    "M23_1"    "M23_2"    "M23_3"    "M23_4"   
##  [427] "M23_5"    "M23_6"    "M24_1"    "M24_2"    "M24_3"    "M24_4"   
##  [433] "M24_5"    "M24_6"    "M25_1"    "M25_2"    "M25_3"    "M25_4"   
##  [439] "M25_5"    "M25_6"    "M26_1"    "M26_2"    "M26_3"    "M26_4"   
##  [445] "M26_5"    "M26_6"    "M27_1"    "M27_2"    "M27_3"    "M27_4"   
##  [451] "M27_5"    "M27_6"    "M28_1"    "M28_2"    "M28_3"    "M28_4"   
##  [457] "M28_5"    "M28_6"    "M29_1"    "M29_2"    "M29_3"    "M29_4"   
##  [463] "M29_5"    "M29_6"    "M30_1"    "M30_2"    "M30_3"    "M30_4"   
##  [469] "M30_5"    "M30_6"    "M31_1"    "M31_2"    "M31_3"    "M31_4"   
##  [475] "M31_5"    "M31_6"    "M32_1"    "M32_2"    "M32_3"    "M32_4"   
##  [481] "M32_5"    "M32_6"    "M33_1"    "M33_2"    "M33_3"    "M33_4"   
##  [487] "M33_5"    "M33_6"    "M34_1"    "M34_2"    "M34_3"    "M34_4"   
##  [493] "M34_5"    "M34_6"    "MSF35_1"  "MSF35_2"  "M37_1"    "M37_2"   
##  [499] "M37_3"    "M37_4"    "M37_5"    "M37_6"    "M38_1"    "M38_2"   
##  [505] "M38_3"    "M38_4"    "M38_5"    "M38_6"    "M39_1"    "M39_2"   
##  [511] "M39_3"    "M39_4"    "M39_5"    "M39_6"    "M40_1"    "M40_2"   
##  [517] "M40_3"    "M40_4"    "M40_5"    "M40_6"    "M41_1"    "M41_2"   
##  [523] "M41_3"    "M41_4"    "M41_5"    "M41_6"    "M42_1"    "M42_2"   
##  [529] "M42_3"    "M42_4"    "M42_5"    "M42_6"    "M43_1"    "M43_2"   
##  [535] "M43_3"    "M43_4"    "M43_5"    "M43_6"    "M44_1"    "M44_2"   
##  [541] "M44_3"    "M44_4"    "M44_5"    "M44_6"    "M45_1"    "M45_2"   
##  [547] "M45_3"    "M45_4"    "M45_5"    "M45_6"    "M46_1"    "M46_2"   
##  [553] "M46_3"    "M46_4"    "M46_5"    "M46_6"    "M47_1"    "M47_2"   
##  [559] "M47_3"    "M47_4"    "M47_5"    "M47_6"    "M48_1"    "M48_2"   
##  [565] "M48_3"    "M48_4"    "M48_5"    "M48_6"    "M49_1"    "M49_2"   
##  [571] "M49_3"    "M49_4"    "M49_5"    "M49_6"    "M50_1"    "M50_2"   
##  [577] "M50_3"    "M50_4"    "M50_5"    "M50_6"    "M51_1"    "M51_2"   
##  [583] "M51_3"    "M51_4"    "M51_5"    "M51_6"    "M52_1"    "M52_2"   
##  [589] "M52_3"    "M52_4"    "M52_5"    "M52_6"    "N1"       "N2"      
##  [595] "N3"       "N4"       "N5"       "N1_1"     "NP1_1"    "N1_2"    
##  [601] "NP2_1"    "N1_3"     "NP3_1"    "N1_4"     "NP4_1"    "N1_5"    
##  [607] "NP5_1"    "N1_6"     "NP6_1"    "N1_7"     "NP7_1"    "N1_8"    
##  [613] "NP8_1"    "N2_1"     "NP1_2"    "N2_2"     "NP2_2"    "N2_3"    
##  [619] "NP3_2"    "N2_4"     "NP4_2"    "N2_5"     "NP5_2"    "N2_6"    
##  [625] "NP6_2"    "N2_7"     "NP7_2"    "N2_8"     "NP8_2"    "N3_1"    
##  [631] "NP1_3"    "N3_2"     "NP2_3"    "N3_3"     "NP3_3"    "N3_4"    
##  [637] "NP4_3"    "N3_5"     "NP5_3"    "N3_6"     "NP6_3"    "N3_7"    
##  [643] "NP7_3"    "N3_8"     "NP8_3"    "N4_1"     "NP1_4"    "N4_2"    
##  [649] "NP2_4"    "N4_3"     "NP3_4"    "N4_4"     "NP4_4"    "N4_5"    
##  [655] "NP5_4"    "N4_6"     "NP6_4"    "N4_7"     "NP7_4"    "N4_8"    
##  [661] "NP8_4"    "N5_1"     "NP1_5"    "N5_2"     "NP2_5"    "N5_3"    
##  [667] "NP3_5"    "N5_4"     "NP4_5"    "N5_5"     "NP5_5"    "N5_6"    
##  [673] "NP6_5"    "N5_7"     "NP7_5"    "N5_8"     "NP8_5"    "O1_1"    
##  [679] "O1_2"     "O1_3"     "O1_4"     "O1_5"     "O1_6"     "O2_1"    
##  [685] "O2_2"     "O2_3"     "O2_4"     "O2_5"     "O2_6"     "O3_1"    
##  [691] "O3_2"     "O3_3"     "O3_4"     "O3_5"     "O3_6"     "O4_1"    
##  [697] "O4_2"     "O4_3"     "O4_4"     "O4_5"     "O4_6"     "O5_1"    
##  [703] "O5_2"     "O5_3"     "O5_4"     "O5_5"     "O5_6"     "O6_1"    
##  [709] "O6_2"     "O6_3"     "O6_4"     "O6_5"     "O6_6"     "O7_1"    
##  [715] "O7_2"     "O7_3"     "O7_4"     "O7_5"     "O7_6"     "__0_E"   
##  [721] "__0_D"    "__0_C"    "__0_B"    "__0_A"    "__0"      "O8_1"    
##  [727] "O8_2"     "O8_3"     "O8_4"     "O8_5"     "O8_6"     "O9_1"    
##  [733] "O9_2"     "O9_3"     "O9_4"     "O9_5"     "O9_6"     "O10_1"   
##  [739] "O10_2"    "O10_3"    "O10_4"    "O10_5"    "O10_6"    "O11_1"   
##  [745] "O11_2"    "O11_3"    "O11_4"    "O11_5"    "O11_6"    "O12_1"   
##  [751] "O12_2"    "O12_3"    "O12_4"    "O12_5"    "O12_6"    "O13_1"   
##  [757] "O13_2"    "O13_3"    "O13_4"    "O13_5"    "O13_6"    "O14_1"   
##  [763] "O14_2"    "O14_3"    "O14_4"    "O14_5"    "O14_6"    "O15_1"   
##  [769] "O15_2"    "O15_3"    "O15_4"    "O15_5"    "O15_6"    "O16_1"   
##  [775] "O16_2"    "O16_3"    "O16_4"    "O16_5"    "O16_6"    "O17_1"   
##  [781] "O17_2"    "O17_3"    "O17_4"    "O17_5"    "O17_6"    "O18_1"   
##  [787] "O18_2"    "O18_3"    "O18_4"    "O18_5"    "O18_6"    "O19_1"   
##  [793] "O19_2"    "O19_3"    "O19_4"    "O19_5"    "O19_6"    "O20_1"   
##  [799] "O20_2"    "O20_3"    "O20_4"    "O20_5"    "O20_6"    "O21_1"   
##  [805] "O21_2"    "O21_3"    "O21_4"    "O21_5"    "O21_6"    "O22_1"   
##  [811] "O22_2"    "O22_3"    "O22_4"    "O22_5"    "O22_6"    "O23_1"   
##  [817] "O23_2"    "O23_3"    "O23_4"    "O23_5"    "O23_6"    "O24_1"   
##  [823] "O24_2"    "O24_3"    "O24_4"    "O24_5"    "O24_6"    "O25_1"   
##  [829] "O25_2"    "O25_3"    "O25_4"    "O25_5"    "O25_6"    "O26_1"   
##  [835] "O26_2"    "O26_3"    "O26_4"    "O26_5"    "O26_6"    "O27_1"   
##  [841] "O27_2"    "O27_3"    "O27_4"    "O27_5"    "O27_6"    "O28_1"   
##  [847] "O28_2"    "O28_3"    "O28_4"    "O28_5"    "O28_6"    "O29_1"   
##  [853] "O29_2"    "O29_3"    "O29_4"    "O29_5"    "O29_6"    "O30_1"   
##  [859] "O30_2"    "O30_3"    "O30_4"    "O30_5"    "O30_6"    "O31_1"   
##  [865] "O31_2"    "O31_3"    "O31_4"    "O31_5"    "O31_6"    "O32_1"   
##  [871] "O32_2"    "O32_3"    "O32_4"    "O32_5"    "O32_6"    "O33_1"   
##  [877] "O33_2"    "O33_3"    "O33_4"    "O33_5"    "O33_6"    "O34_1"   
##  [883] "O34_2"    "O34_3"    "O34_4"    "O34_5"    "O34_6"    "O35_1"   
##  [889] "O35_2"    "O35_3"    "O35_4"    "O35_5"    "O35_6"    "O36_1"   
##  [895] "O36_2"    "O36_3"    "O36_4"    "O36_5"    "O36_6"    "P1"      
##  [901] "P2"       "P3"       "P4"       "P5"       "P1_1"     "PP1_1"   
##  [907] "P1_2"     "PP2_1"    "P1_3"     "PP3_1"    "P1_4"     "PP4_1"   
##  [913] "P1_5"     "PP5_1"    "P1_6"     "PP6_1"    "P1_7"     "PP7_1"   
##  [919] "P1_8"     "PP8_1"    "P2_1"     "PP1_2"    "P2_2"     "PP2_2"   
##  [925] "P2_3"     "PP3_2"    "P2_4"     "PP4_2"    "P2_5"     "PP5_2"   
##  [931] "P2_6"     "PP6_2"    "P2_7"     "PP7_2"    "P2_8"     "PP8_2"   
##  [937] "P3_1"     "PP1_3"    "P3_2"     "PP2_3"    "P3_3"     "PP3_3"   
##  [943] "P3_4"     "PP4_3"    "P3_5"     "PP5_3"    "P3_6"     "PP6_3"   
##  [949] "P3_7"     "PP7_3"    "P3_8"     "PP8_3"    "P4_1"     "PP1_4"   
##  [955] "P4_2"     "PP2_4"    "P4_3"     "PP3_4"    "P4_4"     "PP4_4"   
##  [961] "P4_5"     "PP5_4"    "P4_6"     "PP6_4"    "P4_7"     "PP7_4"   
##  [967] "P4_8"     "PP8_4"    "P5_1"     "PP1_5"    "P5_2"     "PP2_5"   
##  [973] "P5_3"     "PP3_5"    "P5_4"     "PP4_5"    "P5_5"     "PP5_5"   
##  [979] "P5_6"     "PP6_5"    "P5_7"     "PP7_5"    "P5_8"     "PP8_5"   
##  [985] "Q1_1"     "Q1_2"     "Q1_3"     "Q1_4"     "Q1_5"     "Q1_6"    
##  [991] "Q1_7"     "Q1_8"     "Q1_9"     "Q1_10"    "Q1_11"    "Q1_12"   
##  [997] "Q1_13"    "Q1_14"    "Q1_15"    "Q2_1"    
##  [ reached getOption("max.print") -- omitted 684 entries ]
names(tabla_nueva)
## [1] "ciiu_4" "ciiu_3"
#zzff.10 %>% left_join(tabla_nueva, by=c("G1"="ciiu_4"))
zzff.10 <- zzff.10 %>% 
  mutate(ciiu_4 = as.character(.$G1)) %>% 
  left_join(tabla_nueva)
## Joining, by = "ciiu_4"
zzff.10 %>% head
##   V1546  Ano                             A1           A2
##                                            A3                     A4
##             A5       A6       A7                                 A8 A9_0
##   A9_1 A9_2           A10                   B1       B2
##                      B3                           B4 B5_1 B5_2 B5_3 B5_4
##   B5_5 B5_6 B6 B7 B8 B9 B10 B11 D1 D2 D3       D4       D5 E1
##                       E2 E3                                        E4
##            E5        F1 F1_1          F2 F2_1          F3 F3_1          F4
##   F4_1   G1                                                 G1_1 G1_2   G2
##                                                                        G2_1
##   G2_2 G3 G3_1 G3_2 G4 H1_1 H1_2 H1_2_A H1_3 H1_4 H1_5 H1_6 H1_7 H1_8 H1_9
##   H1_10 H1_11 H1_12 H1_13 H1_14 H1_15 H1_16 H1_17 H1_18 H1_19 H1_20 H1_21
##   H2_1 H2_2   H2_3 H2_4 H2_5 H2_6 H2_7 H2_8 H2_9 H2_10 H2_11 H2_12 H2_13
##   H2_14 H2_15 H2_16 H2_17 H2_18 H2_19 H2_20 H2_21 H3_1 H3_2   H3_3 H3_4
##   H3_5 H3_6 H3_7 H3_8 H3_9 H3_10 H3_11 H3_12 H3_13 H3_14 H3_15 H3_16 H3_17
##   H3_18 H3_19 H3_20 H3_21 __2H1_1  __2H1_2  __2H1_3 __2H1_4 __2H1_5
##   __2H1_6 __2H1_7 __2H1_8 __2H1_9 __2H1_10 __2H1_11 __2H1_12 __2H1_13
##   __2H1_14 __2H1_15 __2H2_1  __2H2_2  __2H2_3 __2H2_4 __2H2_5 __2H2_6
##   __2H2_7 __2H2_8 __2H2_9 __2H2_10 __2H2_11 __2H2_12 __2H2_13 __2H2_14
##   __2H2_15 __2H3_1   __2H3_2  __2H3_3 __2H3_4 __2H3_5 __2H3_6 __2H3_7
##   __2H3_8 __2H3_9 __2H3_10 __2H3_11 __2H3_12 __2H3_13 __2H3_14 __2H3_15
##   __2H4_1   __2H4_2  __2H4_3 __2H4_4 __2H4_5 __2H4_6 __2H4_7 __2H4_8
##   __2H4_9 __2H4_10 __2H4_11 __2H4_12 __2H4_13 __2H4_14 __2H4_15     I1_1
##   I1_2   I1_3    I2_1 I2_2 I2_3    I3_1   I3_2  I3_3     I4_1 I4_2 I4_3
##   I5_1    I5_2 I5_3     I6_1 I6_2 I6_3     I7_1    I7_2   I7_3      J1_1
##   J1_2     J2_1 J2_2     J3_1 J3_2      J4_1 J4_2      J5_1 J5_2 J6_1 J6_2
##   J7_1 J7_2 K1_1 K1_2 K1_3 K1_4 K1_5 K1_6 K2_1 K2_2 K2_3 K2_4 K2_5 K2_6
##   K3_1 K3_2   K3_3 K3_4 K3_5      K3_6 K4_1 K4_2     K4_3 K4_4 K4_5 K4_6
##   K5_1 K5_2    K5_3 K5_4 K5_5   K5_6 K6_1 K6_2    K6_3 K6_4 K6_5    K6_6
##   K7_1 K7_2 K7_3 K7_4 K7_5      K7_6 K8_1 K8_2 K8_3 K8_4 K8_5     K8_6
##   K9_1 K9_2     K9_3 K9_4 K9_5      K9_6 L1 L2 L3 L4_1 L4_2 L5 L6 L7 L8 L9
##   L10 L10_1 L10_2       M1_1      M1_2 M1_3      M1_4       M1_5
##         M1_6 M2_1 M2_2 M2_3 M2_4 M2_5 M2_6 M3_1 M3_2 M3_3 M3_4 M3_5 M3_6
##   M4_1      M4_2 M4_3      M4_4     M4_5   M4_6       M5_1      M5_2 M5_3
##        M5_4       M5_5       M5_6 M6_1 M6_2 M6_3 M6_4 M6_5    M6_6 M7_1
##        M7_2 M7_3 M7_4    M7_5    M7_6 M8_1_A M8_2 M8_3 M8_4 M8_5 M8_6 M9_1
##        M9_2 M9_3 M9_4    M9_5    M9_6 MP1 MP2 MP3 MP4 MP5 MP6 M10_1 M10_2
##   M10_3 M10_4 M10_5 M10_6 M11_1 M11_2 M11_3 M11_4 M11_5 M11_6 M12_1 M12_2
##   M12_3 M12_4 M12_5 M12_6 M8_1 M13_2 M13_3 M13_4 M13_5 M13_6 M14_1 M14_2
##   M14_3 M14_4 M14_5 M14_6 M15_1 M15_2 M15_3 M15_4 M15_5 M15_6 M16_1
##       M16_2 M16_3 M16_4 M16_5 M16_6 M17_1 M17_2 M17_3 M17_4 M17_5 M17_6
##   M18_1     M18_2 M18_3 M18_4 M18_5 M18_6 M19_1 M19_2 M19_3 M19_4 M19_5
##   M19_6 M20_1 M20_2 M20_3 M20_4 M20_5 M20_6 M21_1 M21_2 M21_3 M21_4 M21_5
##   M21_6 M22_1 M22_2 M22_3 M22_4 M22_5 M22_6 M23_1 M23_2 M23_3 M23_4 M23_5
##   M23_6 M24_1 M24_2 M24_3 M24_4 M24_5 M24_6 M25_1 M25_2 M25_3 M25_4 M25_5
##   M25_6 M26_1 M26_2 M26_3 M26_4 M26_5 M26_6 M27_1 M27_2 M27_3 M27_4 M27_5
##   M27_6 M28_1 M28_2 M28_3 M28_4 M28_5 M28_6 M29_1 M29_2 M29_3 M29_4 M29_5
##   M29_6 M30_1 M30_2 M30_3 M30_4 M30_5 M30_6 M31_1 M31_2 M31_3 M31_4 M31_5
##   M31_6 M32_1 M32_2 M32_3 M32_4 M32_5 M32_6 M33_1 M33_2 M33_3 M33_4 M33_5
##   M33_6 M34_1 M34_2 M34_3 M34_4 M34_5 M34_6 MSF35_1 MSF35_2 M37_1 M37_2
##   M37_3 M37_4 M37_5 M37_6 M38_1 M38_2 M38_3 M38_4 M38_5 M38_6 M39_1 M39_2
##   M39_3 M39_4 M39_5 M39_6 M40_1 M40_2 M40_3 M40_4 M40_5 M40_6 M41_1 M41_2
##   M41_3 M41_4 M41_5 M41_6 M42_1 M42_2 M42_3 M42_4 M42_5 M42_6 M43_1 M43_2
##   M43_3 M43_4 M43_5 M43_6 M44_1 M44_2 M44_3 M44_4 M44_5 M44_6 M45_1 M45_2
##   M45_3 M45_4 M45_5 M45_6 M46_1 M46_2 M46_3 M46_4 M46_5 M46_6 M47_1 M47_2
##   M47_3   M47_4 M47_5 M47_6 M48_1    M48_2 M48_3    M48_4 M48_5  M48_6
##   M49_1 M49_2 M49_3 M49_4 M49_5 M49_6 M50_1    M50_2 M50_3   M50_4 M50_5
##     M50_6 M51_1    M51_2 M51_3    M51_4 M51_5   M51_6      M52_1     M52_2
##   M52_3      M52_4      M52_5      M52_6          N1      N2 N3 N4      N5
##          N1_1 NP1_1        N1_2 NP2_1        N1_3 NP3_1        N1_4 NP4_1
##          N1_5 NP5_1        N1_6 NP6_1        N1_7 NP7_1        N1_8 NP8_1
##             N2_1 NP1_2        N2_2 NP2_2        N2_3 NP3_2        N2_4
##   NP4_2        N2_5 NP5_2        N2_6 NP6_2        N2_7 NP7_2        N2_8
##   NP8_2        N3_1 NP1_3        N3_2 NP2_3        N3_3 NP3_3        N3_4
##   NP4_3        N3_5 NP5_3        N3_6 NP6_3        N3_7 NP7_3        N3_8
##   NP8_3        N4_1 NP1_4        N4_2 NP2_4        N4_3 NP3_4        N4_4
##   NP4_4        N4_5 NP5_4        N4_6 NP6_4        N4_7 NP7_4        N4_8
##   NP8_4        N5_1 NP1_5        N5_2 NP2_5        N5_3 NP3_5        N5_4
##   NP4_5        N5_5 NP5_5        N5_6 NP6_5        N5_7 NP7_5        N5_8
##   NP8_5      O1_1      O1_2       O1_3      O1_4      O1_5      O1_6 O2_1
##   O2_2 O2_3 O2_4 O2_5 O2_6 O3_1 O3_2 O3_3 O3_4 O3_5 O3_6 O4_1      O4_2
##   O4_3 O4_4     O4_5   O4_6      O5_1      O5_2       O5_3      O5_4
##        O5_5      O5_6 O6_1 O6_2 O6_3 O6_4 O6_5    O6_6 O7_1     O7_2 O7_3
##   O7_4    O7_5      O7_6 __0_E    __0_D __0_C __0_B   __0_A       __0
##       O8_1    O8_2 O8_3   O8_4 O8_5 O8_6 O9_1 O9_2   O9_3     O9_4 O9_5
##      O9_6 O10_1     O10_2 O10_3    O10_4 O10_5      O10_6 O11_1   O11_2
##      O11_3   O11_4 O11_5 O11_6 O12_1   O12_2 O12_3   O12_4  O12_5 O12_6
##   O13_1     O13_2 O13_3    O13_4 O13_5 O13_6 O14_1   O14_2 O14_3    O14_4
##   O14_5 O14_6 O15_1     O15_2 O15_3  O15_4 O15_5 O15_6 O16_1     O16_2
##    O16_3    O16_4 O16_5   O16_6 O17_1 O17_2 O17_3  O17_4 O17_5 O17_6 O18_1
##    O18_2 O18_3     O18_4 O18_5 O18_6 O19_1 O19_2 O19_3    O19_4     O19_5
##       O19_6 O20_1 O20_2 O20_3 O20_4    O20_5 O20_6 O21_1 O21_2 O21_3 O21_4
##   O21_5    O21_6 O22_1  O22_2 O22_3   O22_4   O22_5 O22_6 O23_1   O23_2
##   O23_3 O23_4   O23_5  O23_6    O24_1     O24_2    O24_3     O24_4
##       O24_5      O24_6 O25_1 O25_2 O25_3 O25_4 O25_5 O25_6 O26_1 O26_2
##   O26_3 O26_4 O26_5 O26_6 O27_1 O27_2 O27_3 O27_4 O27_5 O27_6 O28_1 O28_2
##   O28_3 O28_4 O28_5 O28_6 O29_1 O29_2 O29_3 O29_4 O29_5 O29_6 O30_1 O30_2
##   O30_3   O30_4   O30_5   O30_6 O31_1    O31_2 O31_3   O31_4 O31_5  O31_6
##   O32_1  O32_2 O32_3  O32_4 O32_5   O32_6 O33_1 O33_2 O33_3  O33_4
##      O33_5     O33_6 O34_1  O34_2 O34_3   O34_4  O34_5 O34_6 O35_1
##      O35_2 O35_3   O35_4    O35_5     O35_6     O36_1      O36_2
##        O36_3     O36_4     O36_5      O36_6        P1        P2 P3 P4
##           P5        P1_1 PP1_1        P1_2 PP2_1        P1_3 PP3_1
##          P1_4 PP4_1        P1_5 PP5_1        P1_6 PP6_1        P1_7 PP7_1
##          P1_8 PP8_1           P2_1 PP1_2        P2_2 PP2_2        P2_3
##   PP3_2        P2_4 PP4_2        P2_5 PP5_2        P2_6 PP6_2        P2_7
##   PP7_2        P2_8 PP8_2        P3_1 PP1_3        P3_2 PP2_3        P3_3
##   PP3_3        P3_4 PP4_3        P3_5 PP5_3        P3_6 PP6_3        P3_7
##   PP7_3        P3_8 PP8_3        P4_1 PP1_4        P4_2 PP2_4        P4_3
##   PP3_4        P4_4 PP4_4        P4_5 PP5_4        P4_6 PP6_4        P4_7
##   PP7_4        P4_8 PP8_4           P5_1 PP1_5          P5_2 PP2_5
##          P5_3 PP3_5        P5_4 PP4_5        P5_5 PP5_5        P5_6 PP6_5
##          P5_7 PP7_5        P5_8 PP8_5 Q1_1 Q1_2 Q1_3 Q1_4        Q1_5 Q1_6
##   Q1_7 Q1_8 Q1_9 Q1_10 Q1_11 Q1_12 Q1_13 Q1_14 Q1_15 Q2_1 Q2_2 Q2_3 Q2_4
##          Q2_5 Q2_6 Q2_7 Q2_8 Q2_9 Q2_10 Q2_11 Q2_12 Q2_13 Q2_14 Q2_15 Q3_1
##   Q3_2 Q3_3 Q3_4        Q3_5 Q3_6 Q3_7 Q3_8 Q3_9 Q3_10 Q3_11 Q3_12 Q3_13
##   Q3_14 Q3_15    Q4_1 Q4_2    Q4_3 Q4_4        Q4_5 Q4_6    Q4_7 Q4_8 Q4_9
##   Q4_10 Q4_11  Q4_12   Q4_13 Q4_14   Q4_15     Q5_1 Q5_2    Q5_3   Q5_4
##          Q5_5 Q5_6    Q5_7 Q5_8 Q5_9 Q5_10 Q5_11   Q5_12   Q5_13 Q5_14
##      Q5_15        Q6_1 Q6_2      Q6_3     Q6_4        Q6_5 Q6_6      Q6_7
##   Q6_8 Q6_9 Q6_10 Q6_11   Q6_12     Q6_13     Q6_14       Q6_15    Q7_1
##   Q7_2    Q7_3 Q7_4        Q7_5 Q7_6    Q7_7 Q7_8 Q7_9 Q7_10 Q7_11 Q7_12
##    Q7_13  Q7_14   Q7_15     Q8_1 Q8_2   Q8_3    Q8_4        Q8_5 Q8_6
##      Q8_7 Q8_8 Q8_9 Q8_10 Q8_11 Q8_12   Q8_13   Q8_14    Q8_15      Q9_1
##   Q9_2    Q9_3 Q9_4        Q9_5 Q9_6    Q9_7 Q9_8 Q9_9 Q9_10 Q9_11 Q9_12
##      Q9_13    Q9_14     Q9_15      Q10_1 Q10_2   Q10_3 Q10_4       Q10_5
##   Q10_6   Q10_7 Q10_8 Q10_9 Q10_10 Q10_11 Q10_12    Q10_13    Q10_14
##       Q10_15 Q11_1 Q11_2 Q11_3 Q11_4       Q11_5 Q11_6 Q11_7 Q11_8 Q11_9
##   Q11_10 Q11_11 Q11_12 Q11_13 Q11_14 Q11_15 Q12_1 Q12_2 Q12_3 Q12_4
##         Q12_5 Q12_6 Q12_7 Q12_8 Q12_9 Q12_10 Q12_11 Q12_12 Q12_13 Q12_14
##   Q12_15    Q13_1 Q13_2    Q13_3 Q13_4       Q13_5 Q13_6    Q13_7 Q13_8
##   Q13_9 Q13_10 Q13_11   Q13_12 Q13_13 Q13_14   Q13_15     Q14_1 Q14_2
##     Q14_3 Q14_4       Q14_5 Q14_6   Q14_7 Q14_8 Q14_9 Q14_10 Q14_11 Q14_12
##    Q14_13  Q14_14    Q14_15 Q15_1 Q15_2 Q15_3 Q15_4       Q15_5 Q15_6
##   Q15_7 Q15_8 Q15_9 Q15_10 Q15_11 Q15_12 Q15_13 Q15_14 Q15_15 Q16_1 Q16_2
##   Q16_3 Q16_4       Q16_5 Q16_6 Q16_7 Q16_8 Q16_9 Q16_10 Q16_11 Q16_12
##   Q16_13 Q16_14 Q16_15   Q17_1 Q17_2 Q17_3 Q17_4       Q17_5 Q17_6 Q17_7
##   Q17_8 Q17_9 Q17_10 Q17_11  Q17_15    Q18_1 Q18_2    Q18_3 Q18_4
##         Q18_5 Q18_6    Q18_7 Q18_8 Q18_9 Q18_10 Q18_11   Q18_12 Q18_13
##   Q18_14  Q18_15 QM1 QM2 RP1_1
##                                               RP1_2       RP1_3 RP2_1
##                                                        RP2_2     RP2_3
##   RP3_1                   RP3_2     RP3_3 RP4_1 RP4_2 RP4_3 RP5_1 RP5_2
##   RP5_3 RP6_1 RP6_2 RP6_3 RP7_1 RP7_2 RP7_3 RP8_1 RP8_2 RP8_3         rpt
##   RI1_1 RI1_2      RI1_3 RI2_1 RI2_2    RI2_3 RI3_1 RI3_2     RI3_3 RI4_1
##   RI4_2     RI4_3 RI5_1 RI5_2     RI5_3 RI6_1 RI6_2    RI6_3 RI7_1 RI7_2
##      RI7_3 RI8_1 RI8_2     RI8_3        rit    T1_1    T1_2      T2_1
##        T2_2     T3_1     T3_2       T4_1       T4_2     T5_1     T5_2
##        T6_1      T6_2       T7_1       T7_2     U1_1     U1_2   U1_3
##     U1_4 U1_5 U1_6 U2_1 U2_2 U2_3 U2_4 U2_5 U2_6 U3_1 U3_2 U3_3 U3_4 U3_5
##   U3_6 U4_1 U4_2 U4_3 U4_4 U4_5 U4_6 U5_1 U5_2 U5_3 U5_4 U5_5 U5_6
##        U6_1      U6_2      U6_3      U6_4 U6_5 U6_6      U7_1      U7_2
##        U7_3    U7_4 U7_5 U7_6       U8_1    U8_2        U8_3     U8_4 U8_5
##   U8_6      U9_1       U9_2      U9_3    U9_4 U9_5 U9_6      U10_1
##        U10_2      U10_3    U10_4 U10_5 U10_6 U11_1 U11_2 U11_3 U11_4 U11_5
##   U11_6 U12_1 U12_2 U12_3 U12_4 U12_5 U12_6 U13_1 U13_2 U13_3 U13_4 U13_5
##   U13_6    U14_1     U14_2     U14_3     U14_4 U14_5 U14_6    U15_1
##      U15_2    U15_3    U15_4 U15_5 U15_6    U16_1     U16_2     U16_3
##       U16_4 U16_5 U16_6       U17_1       U17_2      U17_3     U17_4 U17_5
##   U17_6       U18_1       U18_2      U18_3     U18_4 U18_5 U18_6
##         U19_1       U19_2       U19_3     U19_4 U19_5 U19_6 V1_1 V1_2 V1_3
##   V1_4 V1_5 V1_6 V2_1       V2_2       V2_3 V2_4 V2_5 V2_6       V3_1
##         V3_2      V3_3      V3_4 V3_5 V3_6       V4_1     V4_2        V4_3
##   V4_4 V4_5 V4_6     V5_1     V5_2     V5_3     V5_4 V5_5 V5_6      V6_1
##       V6_2       V6_3    V6_4 V6_5 V6_6      V7_1      V7_2      V7_3
##        V7_4 V7_5 V7_6       V8_1       V8_2        V8_3      V8_4 V8_5
##   V8_6 V9_1 V9_2 V9_3 V9_4 V9_5 V9_6 V10_1 V10_2 V10_3 V10_4 V10_5 V10_6
##     V11_1 V11_2    V11_3 V11_4 V11_5 V11_6     V12_1     V12_2    V12_3
##      V12_4 V12_5 V12_6    V13_1    V13_2    V13_3    V13_4 V13_5 V13_6
##       V14_1     V14_2     V14_3     V14_4 V14_5 V14_6      V15_1
##        V15_2       V15_3     V15_4 V15_5 V15_6        W1_1        W1_2
##       W2_1     W2_2        W3_1        W3_2      W4_1      W4_2
##          W5_1        W5_2        W6_1        W6_2       W7_1       W7_2
##   W8_1 W8_2       W9_1       W9_2 W10_1    W10_2 X1         X2         X3
##                             X4
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             Y1
##   D1NUMERO D2NUMERO D3NUMERO CONTROL_TUSU    ZonaFranca
##                RazonSocial Tipodeusuario Indirectode
##                        A6RazonSocial   A9Depto     A10Telefono
##           A11Fax                    A12mail               A12_0mail2
##                              A12_1mail3                A12_2mail4
##               A12_3mail5           A13Informante         A14ZonaFranca
##   A15CodigoAZZF
##                                                                                                     OBS_1
##   letra   REM_TOT APORTE_TOT GAS_NO_DEP PO_TOT     ING_IND ING_COMERCIO
##   ING_SERV_NO_FIN ING_SERV_PROF ING_SERV_INFORMATICOS ING_SERV_SEGUROS
##   ING_SERV_FIN ING_OTROS     ING_TOT ING_OTROS_EXTRA    GAS_IND   GAS_COM
##     GAS_OPER GAS_SEGUROS GAS_SERV_FIN   GAS_FIN GAS_OTROS GAS_OTROS_EXTRA
##      GAS_TOT  AMORT_TOT         re PROD_CTA_PROPIA   TOT_FBK   POR_TR
##   VBP_INDUSTRIA VBP_COMERCIO VBP_SERVICIOS VBP_FINANCIERO VBP_OT_FUENTES
##   VBP_CONSTR_CP         vbp         ci        vab         ee    GAS_MAT
##       MP_CI    CI_VBP   VAB_VBP    REM_VAB tramo  VBP_TR     VBP_STR
##     VAB_TR    VAB_STR        EE_TR     EE_STR      VBP_10     VAB_10
##        EE_10 prodpercap outlier       VBP_N       EE_N      CI_ZF
##       CI_ZNF     CI_EXT     CI_OT OUTLIER_C       CI_N   cipercap
##       AMORT2      VBP_N2 ver sector inactivas     expoind   expocom
##   exposerv expofin       EXPO2  DEST_IND1  DEST_IND2  DEST_IND3 DEST_IND4
##   DEST_IND5 DEST_IND6  DEST_IND7  DEST_IND8 DEST_COM1 DEST_COM2 DEST_COM3
##   DEST_COM4 DEST_COM5 DEST_COM6 DEST_COM7 DEST_COM8 DEST_SERV1 DEST_SERV2
##   DEST_SERV3 DEST_SERV4 DEST_SERV5 DEST_SERV6 DEST_SERV7 DEST_SERV8
##   DEST_FINAN1 DEST_FINAN2 DEST_FINAN3 DEST_FINAN4 DEST_FINAN5 DEST_FINAN6
##   DEST_FINAN7 DEST_FINAN8 ventaslog PO_PYS PO_D PO_E PO_O PO_PYT PO_PRIM
##   PO_SEC PO_TER PO_POST ciiu_4 ciiu_3
##  [ reached getOption("max.print") -- omitted 6 rows ]

Tabla con secciones

Abrimos la tabla y la procesamos

file <- "data/ISIC_Rev_4_spanish_structure.txt"
t <- tabla.ciiu4(file)
## Parsed with column specification:
## cols(
##   Code = col_character(),
##   Title = col_character()
## )
t
## # A tibble: 766 x 3
##    ciiu4 desc.ciiu4                                                 secci…
##    <chr> <chr>                                                      <chr> 
##  1 A     Agricultura, ganadería, silvicultura y pesca               A     
##  2 01    Agricultura, ganadería, caza y actividades de servicios c… A     
##  3 011   Cultivo de plantas no perennes                             A     
##  4 0111  Cultivo de cereales (excepto arroz), legumbres y semillas… A     
##  5 0112  Cultivo de arroz                                           A     
##  6 0113  Cultivo de hortalizas y melones, raíces y tubérculos       A     
##  7 0114  Cultivo de cańa de azúcar                                  A     
##  8 0115  Cultivo de tabaco                                          A     
##  9 0116  Cultivo de plantas de fibra                                A     
## 10 0119  Cultivo de otras plantas no perennes                       A     
## # ... with 756 more rows

Hacer el join

De deberes tabla.ciiu =======================================================

Abrimos la tabla

file <- "data/ISIC_Rev_4_spanish_structure.txt"
t <- tabla.ciiu4(file)
## Parsed with column specification:
## cols(
##   Code = col_character(),
##   Title = col_character()
## )

=====================================================

Tidyr

  • Cambia la forma de un data frame (wide a long).
  • Las librerías de tidyverse (ggplot) esperan los datos en un formato "canónico"

Formato Canónico

title

Spread

Spread a key-value pair across multiple columns.

Sirve para cuando tengo nombres de variables como valores.

library(tidyverse)
head(table2)
## # A tibble: 6 x 4
##   country      year type           count
##   <chr>       <int> <chr>          <int>
## 1 Afghanistan  1999 cases            745
## 2 Afghanistan  1999 population  19987071
## 3 Afghanistan  2000 cases           2666
## 4 Afghanistan  2000 population  20595360
## 5 Brazil       1999 cases          37737
## 6 Brazil       1999 population 172006362

Spread

spread(table2, key=type, value=count )
## # A tibble: 6 x 4
##   country      year  cases population
## * <chr>       <int>  <int>      <int>
## 1 Afghanistan  1999    745   19987071
## 2 Afghanistan  2000   2666   20595360
## 3 Brazil       1999  37737  172006362
## 4 Brazil       2000  80488  174504898
## 5 China        1999 212258 1272915272
## 6 China        2000 213766 1280428583

Spread

spread

Gather

Gather columns into key-value pairs.

head(table4a)
## # A tibble: 3 x 3
##   country     `1999` `2000`
##   <chr>        <int>  <int>
## 1 Afghanistan    745   2666
## 2 Brazil       37737  80488
## 3 China       212258 213766
head(table4b)
## # A tibble: 3 x 3
##   country         `1999`     `2000`
##   <chr>            <int>      <int>
## 1 Afghanistan   19987071   20595360
## 2 Brazil       172006362  174504898
## 3 China       1272915272 1280428583
t4a <- gather(table4a, `1999`, `2000`, key="year", value="cases")
t4b <- gather(table4b, `1999`, `2000`, key="year", value="population")
left_join(t4a, t4b)
## Joining, by = c("country", "year")
## # A tibble: 6 x 4
##   country     year   cases population
##   <chr>       <chr>  <int>      <int>
## 1 Afghanistan 1999     745   19987071
## 2 Brazil      1999   37737  172006362
## 3 China       1999  212258 1272915272
## 4 Afghanistan 2000    2666   20595360
## 5 Brazil      2000   80488  174504898
## 6 China       2000  213766 1280428583

¿Cómo interpretarían key y value?

Gather

gather

Votacion

Un experimento de votación con varios "grupos" (hawthorne, civicduty, neighbors, self)

gerber <- readRDS("data/gerber.rds")
str(gerber)
## 'data.frame':    344084 obs. of  8 variables:
##  $ sex      : int  0 1 1 1 0 1 0 0 1 0 ...
##  $ yob      : int  1941 1947 1982 1950 1951 1959 1956 1981 1968 1967 ...
##  $ voting   : int  0 0 1 1 1 1 1 0 0 0 ...
##  $ hawthorne: int  0 0 1 1 1 0 0 0 0 0 ...
##  $ civicduty: int  1 1 0 0 0 0 0 0 0 0 ...
##  $ neighbors: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ self     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ control  : int  0 0 0 0 0 1 1 1 1 1 ...

Quiero sacar la proporción de cuántos votan por grupo. Votacion ==================================================== ¿Cuál sería la forma tidy de este juego de datos?

gerber_tidy <- gerber %>% 
  gather(group, sacar, hawthorne, civicduty, neighbors, self) 
head(gerber_tidy)
##   sex  yob voting control     group sacar
## 1   0 1941      0       0 hawthorne     0
## 2   1 1947      0       0 hawthorne     0
## 3   1 1982      1       0 hawthorne     1
## 4   1 1950      1       0 hawthorne     1
## 5   0 1951      1       0 hawthorne     1
## 6   1 1959      1       1 hawthorne     0

Votacion

gerber_tidy %>%
  filter(sacar == 1) %>%
  select(-sacar) %>%
  group_by(group) %>%
  summarize(mean_voting = mean(voting))
## # A tibble: 4 x 2
##   group     mean_voting
##   <chr>           <dbl>
## 1 civicduty       0.315
## 2 hawthorne       0.322
## 3 neighbors       0.378
## 4 self            0.345

Tangente

Notar la utilidad del pipe (%>%) en este caso.

RPAE

Abrir archivo

marco_rpae <- readRDS('/home/rlabuonora/bases/confidencial/marco_rpae.rds')
names(marco_rpae)
##  [1] "RUT"              "INE"              "RSOCIAL"         
##  [4] "CIIU4_2011"       "PO_2011"          "MIMP_2011"       
##  [7] "VTAS_2011"        "Intervalo_Ventas" "DEPTO_2011"      
## [10] "CIIU4_2012"       "PO_2012"          "MIMP_2012"       
## [13] "VTAS_2012"        "DEPTO_2012"       "CIIU4_2013"      
## [16] "PO_2013"          "MIMP_2013"        "VTAS_2013"       
## [19] "DEPTO_2013"       "CIIU4_2014"       "PO_2014"         
## [22] "MIMP_2014"        "VTAS_2014"        "DEPTO_2014"      
## [25] "DEPTO_2015"       "PO_2015"          "MIMP_2015"       
## [28] "CIIU4_2015"       "DEPTO_2016"       "PO_2016"         
## [31] "MIMP_2016"        "CIIU4_2016"       "PrimarioÚltimo"  
## [34] "NOMBRE"           "DEPTO_UTE"        "NUMDEP"          
## [37] "DEPTO11"          "DEPTO12"          "DEPTO13"         
## [40] "DEPTO14"          "DEPTO15"          "DEPTO16"         
## [43] "DEPTOUTE"         "DEPTO"            "DEPTO_REV"       
## [46] "filter_$"         "DIV_11"           "DIV_12"          
## [49] "DIV_13"           "DIV_14"           "DIV_15"          
## [52] "DIV_16"

Seleccionar campos:

  • rut, ine, razon_social, depto, ventas, personal_ocupado
marco_rpae <- marco_rpae %>% 
  rename(rut=RUT, nro_ine = INE, razon_social=RSOCIAL, depto=DEPTO) %>%
  select(rut, nro_ine, razon_social, depto, matches("_20\\d{2}$"), -matches("DEPTO_"))

nombre de variables en minúscula

names(marco_rpae) <- tolower(names(marco_rpae))

Tidy - Paso 1

Gather con todas las variables que terminan en cuatro dígitos.

# tidy con los años
marco_rpae_tidy <- marco_rpae %>% 
  gather(var, val, matches("_\\d{4}$"))

Paso 2 - Separar

marco_rpae_tidy <- marco_rpae_tidy %>%
   separate(var, into=c("variable", "year"))
head(marco_rpae_tidy)
## # A tibble: 6 x 7
##       rut nro_ine razon_social                   depto variab… year    val
##     <dbl>   <dbl> <chr>                          <dbl> <chr>   <chr> <dbl>
## 1   67347     859 Colegio Liceo Beata Imelda      10.0 ciiu4   2011   8510
## 2   70340  190505 Raffo Puppo Teresita           NaN   ciiu4   2011    NaN
## 3   73946  847387 Schandy Gabarda Thomas          10.0 ciiu4   2011    NaN
## 4 1004845  818971 Gerona Milans Beatriz Alba      10.0 ciiu4   2011    NaN
## 5 1007096       0 De Castro Morros Marta Teresa   10.0 ciiu4   2011    NaN
## 6 1007273  798452 Mosteiro Cousillas Maria Caro…  10.0 ciiu4   2011    NaN

Paso 3 - Spread

marco_rpae_tidy <- marco_rpae_tidy %>%
    spread(variable, val)
head(marco_rpae_tidy)
## # A tibble: 6 x 9
##       rut nro_ine razon_social        depto year  ciiu4   mimp    po  vtas
##     <dbl>   <dbl> <chr>               <dbl> <chr> <dbl>  <dbl> <dbl> <dbl>
## 1   67347     859 Colegio Liceo Beat…  10.0 2011   8510 701207  77.9     0
## 2   70340  190505 Raffo Puppo Teresi… NaN   2011    NaN    NaN NaN     NaN
## 3   73946  847387 Schandy Gabarda Th…  10.0 2011    NaN    NaN NaN     NaN
## 4 1004845  818971 Gerona Milans Beat…  10.0 2011    NaN    NaN NaN     NaN
## 5 1007096       0 De Castro Morros M…  10.0 2011    NaN    NaN NaN     NaN
## 6 1007273  798452 Mosteiro Cousillas…  10.0 2011    NaN    NaN NaN     NaN

Cambiar NaN por NA

sacarNans <- function(x) {
  if_else(is.nan((x)), NA_real_, x)
}

marco_rpae_tidy <- marco_rpae_tidy %>%
  mutate_if(is.numeric, funs(sacarNans(.)))

marco_rpae_tidy
## # A tibble: 2,951,220 x 9
##        rut nro_ine razon_social       depto year  ciiu4   mimp    po  vtas
##      <dbl>   <dbl> <chr>              <dbl> <chr> <dbl>  <dbl> <dbl> <dbl>
##  1   67347     859 Colegio Liceo Bea… 10.0  2011   8510 701207  77.9     0
##  2   70340  190505 Raffo Puppo Teres… NA    2011     NA     NA  NA      NA
##  3   73946  847387 Schandy Gabarda T… 10.0  2011     NA     NA  NA      NA
##  4 1004845  818971 Gerona Milans Bea… 10.0  2011     NA     NA  NA      NA
##  5 1007096       0 De Castro Morros … 10.0  2011     NA     NA  NA      NA
##  6 1007273  798452 Mosteiro Cousilla… 10.0  2011     NA     NA  NA      NA
##  7 1007528  709676 Sanjuan Martinez … NA    2011     NA     NA  NA      NA
##  8 1012417  709678 Maestro Irigaray … 10.0  2011     NA     NA  NA      NA
##  9 1015555  709679 Cesar Cardoso Cam… 10.0  2011     NA     NA  NA      NA
## 10 1032308  798453 Goldschmidt Tresz…  9.00 2011     NA     NA  NA      NA
## # ... with 2,951,210 more rows

Exportaciones desde ZZFF

Ver el código para sacar las exportaciones hacia Uruguay.

Ejercicio

Deberes - Swirl

# install.pacakges("swirl")
library(swirl)
install_course("Getting and Cleaning Data")