Data frames

Creación de un data frame

dias <- c('Lun','Mar','Mie','Jue','Vie','Sab','Dom') 
mdias <- c(dias[as.integer(runif(1000,0,7)+1)])
fdias <- factor(mdias)
df <- data.frame(Dia = fdias[1:20], Estimado = rep(c(T,F),10), Lectura = rnorm(20,5))
head(df)
##   Dia Estimado  Lectura
## 1 Sab     TRUE 5.549506
## 2 Dom    FALSE 5.008355
## 3 Mie     TRUE 4.626258
## 4 Vie    FALSE 6.232892
## 5 Mar     TRUE 5.117368
## 6 Mar    FALSE 5.289064
c(length(df), ncol(df), nrow(df))
## [1]  3  3 20
df2 <- data.frame(Dia = numeric(), Estimado = logical(), Lectura = numeric())
df2
## [1] Dia      Estimado Lectura 
## <0 rows> (or 0-length row.names)
df3 <- data.frame(Dia = numeric(10), Estimado = logical(10), Lectura = numeric(10))
df3
##    Dia Estimado Lectura
## 1    0    FALSE       0
## 2    0    FALSE       0
## 3    0    FALSE       0
## 4    0    FALSE       0
## 5    0    FALSE       0
## 6    0    FALSE       0
## 7    0    FALSE       0
## 8    0    FALSE       0
## 9    0    FALSE       0
## 10   0    FALSE       0

Acceder al contenido de un data frame

df[5,3] # Tercera columna de la quinta fila
## [1] 5.117368
df[5,] # Quinta fila completa
##   Dia Estimado  Lectura
## 5 Mar     TRUE 5.117368
df[,3] # Tercera columna completa
##  [1] 5.549506 5.008355 4.626258 6.232892 5.117368 5.289064 5.705650 3.585128
##  [9] 5.352827 5.650393 5.548925 6.718259 4.782671 5.347531 4.107657 4.146496
## [17] 4.019923 5.710751 4.252564 2.435578
df[c(-3,-6),] # Todo menos filas 3 y 6
##    Dia Estimado  Lectura
## 1  Sab     TRUE 5.549506
## 2  Dom    FALSE 5.008355
## 4  Vie    FALSE 6.232892
## 5  Mar     TRUE 5.117368
## 7  Vie     TRUE 5.705650
## 8  Lun    FALSE 3.585128
## 9  Sab     TRUE 5.352827
## 10 Mie    FALSE 5.650393
## 11 Sab     TRUE 5.548925
## 12 Lun    FALSE 6.718259
## 13 Vie     TRUE 4.782671
## 14 Dom    FALSE 5.347531
## 15 Dom     TRUE 4.107657
## 16 Sab    FALSE 4.146496
## 17 Dom     TRUE 4.019923
## 18 Dom    FALSE 5.710751
## 19 Dom     TRUE 4.252564
## 20 Sab    FALSE 2.435578
df$Lectura
##  [1] 5.549506 5.008355 4.626258 6.232892 5.117368 5.289064 5.705650 3.585128
##  [9] 5.352827 5.650393 5.548925 6.718259 4.782671 5.347531 4.107657 4.146496
## [17] 4.019923 5.710751 4.252564 2.435578
df$Estimado==F
##  [1] FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE
## [13] FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE
# Obtener el d́ıa y lectura de todas las filas en las que no se haya estimado
df[df$Estimado == F, c('Dia','Lectura')]
##    Dia  Lectura
## 2  Dom 5.008355
## 4  Vie 6.232892
## 6  Mar 5.289064
## 8  Lun 3.585128
## 10 Mie 5.650393
## 12 Lun 6.718259
## 14 Dom 5.347531
## 16 Sab 4.146496
## 18 Dom 5.710751
## 20 Sab 2.435578
# Filtrar tambíen las filas cuya lectura sea <= que 3
df[df$Estimado == F & df$Lectura > 3, c('Dia','Lectura')]
##    Dia  Lectura
## 2  Dom 5.008355
## 4  Vie 6.232892
## 6  Mar 5.289064
## 8  Lun 3.585128
## 10 Mie 5.650393
## 12 Lun 6.718259
## 14 Dom 5.347531
## 16 Sab 4.146496
## 18 Dom 5.710751
df[15,1] <- 'Vie' # Acceso al mismo dato usando
df$Dia[15] <- 'Vie' # dos notaciones distintas
df[12:17,]
##    Dia Estimado  Lectura
## 12 Lun    FALSE 6.718259
## 13 Vie     TRUE 4.782671
## 14 Dom    FALSE 5.347531
## 15 Vie     TRUE 4.107657
## 16 Sab    FALSE 4.146496
## 17 Dom     TRUE 4.019923

Agregar filas y columnas a un data frame

Adición de nuevas filas

str(df)
## 'data.frame':    20 obs. of  3 variables:
##  $ Dia     : Factor w/ 7 levels "Dom","Jue","Lun",..: 6 1 5 7 4 4 7 3 6 5 ...
##  $ Estimado: logi  TRUE FALSE TRUE FALSE TRUE FALSE ...
##  $ Lectura : num  5.55 5.01 4.63 6.23 5.12 ...
# Cuidado, se pierden los tipos de las columnas y todas pasan a ser character
df[nrow(df)+1,] <- c('Vie', FALSE, 5)
str(df)
## 'data.frame':    21 obs. of  3 variables:
##  $ Dia     : Factor w/ 7 levels "Dom","Jue","Lun",..: 6 1 5 7 4 4 7 3 6 5 ...
##  $ Estimado: chr  "TRUE" "FALSE" "TRUE" "FALSE" ...
##  $ Lectura : chr  "5.54950574821885" "5.00835464449159" "4.62625769663325" "6.23289217295424" ...
#Eliminar fila del paso anterior para matchear con ejercicio
df <- df[1:(dim(df)[1]-1),]
df[nrow(df)+1,] <- data.frame('Vie', F, 5)
str(df)
## 'data.frame':    21 obs. of  3 variables:
##  $ Dia     : Factor w/ 7 levels "Dom","Jue","Lun",..: 6 1 5 7 4 4 7 3 6 5 ...
##  $ Estimado: chr  "TRUE" "FALSE" "TRUE" "FALSE" ...
##  $ Lectura : chr  "5.54950574821885" "5.00835464449159" "4.62625769663325" "6.23289217295424" ...
tail(df)
##    Dia Estimado          Lectura
## 16 Sab    FALSE 4.14649593547535
## 17 Dom     TRUE 4.01992251868824
## 18 Dom    FALSE 5.71075062575064
## 19 Dom     TRUE 4.25256435041239
## 20 Sab    FALSE 2.43557799676781
## 21 Vie    FALSE                5
df <- rbind(df, data.frame(Dia = fdias[1], Estimado = T, Lectura = 3.1415926))
str(df)
## 'data.frame':    22 obs. of  3 variables:
##  $ Dia     : Factor w/ 7 levels "Dom","Jue","Lun",..: 6 1 5 7 4 4 7 3 6 5 ...
##  $ Estimado: chr  "TRUE" "FALSE" "TRUE" "FALSE" ...
##  $ Lectura : chr  "5.54950574821885" "5.00835464449159" "4.62625769663325" "6.23289217295424" ...
tail(df)
##    Dia Estimado          Lectura
## 17 Dom     TRUE 4.01992251868824
## 18 Dom    FALSE 5.71075062575064
## 19 Dom     TRUE 4.25256435041239
## 20 Sab    FALSE 2.43557799676781
## 21 Vie    FALSE                5
## 22 Sab     TRUE        3.1415926

Inserción de filas

nuevaFila <- data.frame(Dia = fdias[1], Estimado = F, Lectura = 4242)
df <- rbind(df[1:9,], nuevaFila, df[10:nrow(df),])
df[8:14,]
##     Dia Estimado          Lectura
## 8   Lun    FALSE 3.58512835970337
## 9   Sab     TRUE 5.35282743999013
## 10  Sab    FALSE             4242
## 101 Mie    FALSE 5.65039293595447
## 11  Sab     TRUE 5.54892455974784
## 12  Lun    FALSE 6.71825915891035
## 13  Vie     TRUE 4.78267115333993

Adición de nuevas columnas

df$Ajustado <- as.numeric(df$Lectura) + rnorm(nrow(df), 2) #conversión de columna Lectura char a numeric
df <- cbind(df, Fecha = date())
head(df)
##   Dia Estimado          Lectura Ajustado                    Fecha
## 1 Sab     TRUE 5.54950574821885 6.642203 Thu Oct  6 22:47:36 2022
## 2 Dom    FALSE 5.00835464449159 6.534399 Thu Oct  6 22:47:36 2022
## 3 Mie     TRUE 4.62625769663325 7.839938 Thu Oct  6 22:47:36 2022
## 4 Vie    FALSE 6.23289217295424 8.247297 Thu Oct  6 22:47:36 2022
## 5 Mar     TRUE 5.11736844920342 5.890669 Thu Oct  6 22:47:36 2022
## 6 Mar    FALSE 5.28906440674503 7.139080 Thu Oct  6 22:47:36 2022

Inserción de columnas

head(cbind(df[,c(1,3)], Ajustado = as.numeric(df$Lectura) + rnorm(nrow(df),2), df$Estimado)) #conversión de columna Lectura char a numeric
##   Dia          Lectura  Ajustado df$Estimado
## 1 Sab 5.54950574821885  6.782196        TRUE
## 2 Dom 5.00835464449159  6.931451       FALSE
## 3 Mie 4.62625769663325  5.797437        TRUE
## 4 Vie 6.23289217295424 10.165527       FALSE
## 5 Mar 5.11736844920342  7.359127        TRUE
## 6 Mar 5.28906440674503  7.014919       FALSE

Nombres de filas y columnas

names(df)
## [1] "Dia"      "Estimado" "Lectura"  "Ajustado" "Fecha"
colnames(df)
## [1] "Dia"      "Estimado" "Lectura"  "Ajustado" "Fecha"
rownames(df)
##  [1] "1"   "2"   "3"   "4"   "5"   "6"   "7"   "8"   "9"   "10"  "101" "11" 
## [13] "12"  "13"  "14"  "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22"

Data frames y la escalabilidad

n <- 15
df <- data.frame(Lectura = numeric(n), Fecha = character(n), stringsAsFactors = FALSE)
for(idx in 1:n) {
  df$Lectura[idx] <- rnorm(1,10)
  df$Fecha[idx] <- as.character(Sys.Date())
  }
head(df)
##     Lectura      Fecha
## 1  9.977593 2022-10-06
## 2 10.142381 2022-10-06
## 3  8.593626 2022-10-06
## 4  9.007072 2022-10-06
## 5  8.832328 2022-10-06
## 6  9.421968 2022-10-06

Listas

Creación de una lista

mes <- matrix(1:35,ncol=7)
rownames(mes) <- c('Semana1','Semana2','Semana3','Semana4','Semana5')
colnames(mes) <- dias
lst1 <- list(3.1415927, 'Hola', TRUE, fdias[4])
lst2 <- list(fdias[1:10], mes, df)
length(lst1)
## [1] 4
lst1
## [[1]]
## [1] 3.141593
## 
## [[2]]
## [1] "Hola"
## 
## [[3]]
## [1] TRUE
## 
## [[4]]
## [1] Vie
## Levels: Dom Jue Lun Mar Mie Sab Vie
length(lst2)
## [1] 3
lst2
## [[1]]
##  [1] Sab Dom Mie Vie Mar Mar Vie Lun Sab Mie
## Levels: Dom Jue Lun Mar Mie Sab Vie
## 
## [[2]]
##         Lun Mar Mie Jue Vie Sab Dom
## Semana1   1   6  11  16  21  26  31
## Semana2   2   7  12  17  22  27  32
## Semana3   3   8  13  18  23  28  33
## Semana4   4   9  14  19  24  29  34
## Semana5   5  10  15  20  25  30  35
## 
## [[3]]
##      Lectura      Fecha
## 1   9.977593 2022-10-06
## 2  10.142381 2022-10-06
## 3   8.593626 2022-10-06
## 4   9.007072 2022-10-06
## 5   8.832328 2022-10-06
## 6   9.421968 2022-10-06
## 7  10.797022 2022-10-06
## 8   8.948889 2022-10-06
## 9  12.156542 2022-10-06
## 10  9.287533 2022-10-06
## 11  9.833424 2022-10-06
## 12 11.894981 2022-10-06
## 13 11.591196 2022-10-06
## 14  8.632354 2022-10-06
## 15  9.011696 2022-10-06

Acceso a los elementos de una lista

lst1[2]
## [[1]]
## [1] "Hola"
lst1[[2]]
## [1] "Hola"
lst1[c(2,3)]
## [[1]]
## [1] "Hola"
## 
## [[2]]
## [1] TRUE
lst2[[3]][1]
##      Lectura
## 1   9.977593
## 2  10.142381
## 3   8.593626
## 4   9.007072
## 5   8.832328
## 6   9.421968
## 7  10.797022
## 8   8.948889
## 9  12.156542
## 10  9.287533
## 11  9.833424
## 12 11.894981
## 13 11.591196
## 14  8.632354
## 15  9.011696
unlist(lst2)
##                                                                             
##                "6"                "1"                "5"                "7" 
##                                                                             
##                "4"                "4"                "7"                "3" 
##                                                                             
##                "6"                "5"                "1"                "2" 
##                                                                             
##                "3"                "4"                "5"                "6" 
##                                                                             
##                "7"                "8"                "9"               "10" 
##                                                                             
##               "11"               "12"               "13"               "14" 
##                                                                             
##               "15"               "16"               "17"               "18" 
##                                                                             
##               "19"               "20"               "21"               "22" 
##                                                                             
##               "23"               "24"               "25"               "26" 
##                                                                             
##               "27"               "28"               "29"               "30" 
##                                                                             
##               "31"               "32"               "33"               "34" 
##                              Lectura1           Lectura2           Lectura3 
##               "35"   "9.977593111156" "10.1423808332646" "8.59362586998506" 
##           Lectura4           Lectura5           Lectura6           Lectura7 
## "9.00707206596167" "8.83232825237973"    "9.42196812252" "10.7970223740259" 
##           Lectura8           Lectura9          Lectura10          Lectura11 
## "8.94888945389024" "12.1565422284014"  "9.2875326704231" "9.83342441171811" 
##          Lectura12          Lectura13          Lectura14          Lectura15 
## "11.8949810058114" "11.5911961436968" "8.63235373261366" "9.01169649707222" 
##             Fecha1             Fecha2             Fecha3             Fecha4 
##       "2022-10-06"       "2022-10-06"       "2022-10-06"       "2022-10-06" 
##             Fecha5             Fecha6             Fecha7             Fecha8 
##       "2022-10-06"       "2022-10-06"       "2022-10-06"       "2022-10-06" 
##             Fecha9            Fecha10            Fecha11            Fecha12 
##       "2022-10-06"       "2022-10-06"       "2022-10-06"       "2022-10-06" 
##            Fecha13            Fecha14            Fecha15 
##       "2022-10-06"       "2022-10-06"       "2022-10-06"
unlist(lst2, recursive = FALSE)
## [[1]]
## [1] 6
## 
## [[2]]
## [1] 1
## 
## [[3]]
## [1] 5
## 
## [[4]]
## [1] 7
## 
## [[5]]
## [1] 4
## 
## [[6]]
## [1] 4
## 
## [[7]]
## [1] 7
## 
## [[8]]
## [1] 3
## 
## [[9]]
## [1] 6
## 
## [[10]]
## [1] 5
## 
## [[11]]
## [1] 1
## 
## [[12]]
## [1] 2
## 
## [[13]]
## [1] 3
## 
## [[14]]
## [1] 4
## 
## [[15]]
## [1] 5
## 
## [[16]]
## [1] 6
## 
## [[17]]
## [1] 7
## 
## [[18]]
## [1] 8
## 
## [[19]]
## [1] 9
## 
## [[20]]
## [1] 10
## 
## [[21]]
## [1] 11
## 
## [[22]]
## [1] 12
## 
## [[23]]
## [1] 13
## 
## [[24]]
## [1] 14
## 
## [[25]]
## [1] 15
## 
## [[26]]
## [1] 16
## 
## [[27]]
## [1] 17
## 
## [[28]]
## [1] 18
## 
## [[29]]
## [1] 19
## 
## [[30]]
## [1] 20
## 
## [[31]]
## [1] 21
## 
## [[32]]
## [1] 22
## 
## [[33]]
## [1] 23
## 
## [[34]]
## [1] 24
## 
## [[35]]
## [1] 25
## 
## [[36]]
## [1] 26
## 
## [[37]]
## [1] 27
## 
## [[38]]
## [1] 28
## 
## [[39]]
## [1] 29
## 
## [[40]]
## [1] 30
## 
## [[41]]
## [1] 31
## 
## [[42]]
## [1] 32
## 
## [[43]]
## [1] 33
## 
## [[44]]
## [1] 34
## 
## [[45]]
## [1] 35
## 
## $Lectura
##  [1]  9.977593 10.142381  8.593626  9.007072  8.832328  9.421968 10.797022
##  [8]  8.948889 12.156542  9.287533  9.833424 11.894981 11.591196  8.632354
## [15]  9.011696
## 
## $Fecha
##  [1] "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06"
##  [6] "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06"
## [11] "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06"

Asignación de nombres a los elementos

names(lst1) <- c('PI','Mensaje','Activado','Inicio')
lst1[[1]]
## [1] 3.141593
lst1[['PI']]
## [1] 3.141593
lst1$PI
## [1] 3.141593