Data frames
Creación de un data frame
dias <- c('Lun','Mar','Mie','Jue','Vie','Sab','Dom')
mdias <- c(dias[as.integer(runif(1000,0,7)+1)])
fdias <- factor(mdias)
df <- data.frame(Dia = fdias[1:20], Estimado = rep(c(T,F),10), Lectura = rnorm(20,5))
head(df)
## Dia Estimado Lectura
## 1 Sab TRUE 5.549506
## 2 Dom FALSE 5.008355
## 3 Mie TRUE 4.626258
## 4 Vie FALSE 6.232892
## 5 Mar TRUE 5.117368
## 6 Mar FALSE 5.289064
c(length(df), ncol(df), nrow(df))
## [1] 3 3 20
df2 <- data.frame(Dia = numeric(), Estimado = logical(), Lectura = numeric())
df2
## [1] Dia Estimado Lectura
## <0 rows> (or 0-length row.names)
df3 <- data.frame(Dia = numeric(10), Estimado = logical(10), Lectura = numeric(10))
df3
## Dia Estimado Lectura
## 1 0 FALSE 0
## 2 0 FALSE 0
## 3 0 FALSE 0
## 4 0 FALSE 0
## 5 0 FALSE 0
## 6 0 FALSE 0
## 7 0 FALSE 0
## 8 0 FALSE 0
## 9 0 FALSE 0
## 10 0 FALSE 0
Acceder al contenido de un data frame
df[5,3] # Tercera columna de la quinta fila
## [1] 5.117368
df[5,] # Quinta fila completa
## Dia Estimado Lectura
## 5 Mar TRUE 5.117368
df[,3] # Tercera columna completa
## [1] 5.549506 5.008355 4.626258 6.232892 5.117368 5.289064 5.705650 3.585128
## [9] 5.352827 5.650393 5.548925 6.718259 4.782671 5.347531 4.107657 4.146496
## [17] 4.019923 5.710751 4.252564 2.435578
df[c(-3,-6),] # Todo menos filas 3 y 6
## Dia Estimado Lectura
## 1 Sab TRUE 5.549506
## 2 Dom FALSE 5.008355
## 4 Vie FALSE 6.232892
## 5 Mar TRUE 5.117368
## 7 Vie TRUE 5.705650
## 8 Lun FALSE 3.585128
## 9 Sab TRUE 5.352827
## 10 Mie FALSE 5.650393
## 11 Sab TRUE 5.548925
## 12 Lun FALSE 6.718259
## 13 Vie TRUE 4.782671
## 14 Dom FALSE 5.347531
## 15 Dom TRUE 4.107657
## 16 Sab FALSE 4.146496
## 17 Dom TRUE 4.019923
## 18 Dom FALSE 5.710751
## 19 Dom TRUE 4.252564
## 20 Sab FALSE 2.435578
df$Lectura
## [1] 5.549506 5.008355 4.626258 6.232892 5.117368 5.289064 5.705650 3.585128
## [9] 5.352827 5.650393 5.548925 6.718259 4.782671 5.347531 4.107657 4.146496
## [17] 4.019923 5.710751 4.252564 2.435578
df$Estimado==F
## [1] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE
## [13] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE
# Obtener el d́ıa y lectura de todas las filas en las que no se haya estimado
df[df$Estimado == F, c('Dia','Lectura')]
## Dia Lectura
## 2 Dom 5.008355
## 4 Vie 6.232892
## 6 Mar 5.289064
## 8 Lun 3.585128
## 10 Mie 5.650393
## 12 Lun 6.718259
## 14 Dom 5.347531
## 16 Sab 4.146496
## 18 Dom 5.710751
## 20 Sab 2.435578
# Filtrar tambíen las filas cuya lectura sea <= que 3
df[df$Estimado == F & df$Lectura > 3, c('Dia','Lectura')]
## Dia Lectura
## 2 Dom 5.008355
## 4 Vie 6.232892
## 6 Mar 5.289064
## 8 Lun 3.585128
## 10 Mie 5.650393
## 12 Lun 6.718259
## 14 Dom 5.347531
## 16 Sab 4.146496
## 18 Dom 5.710751
df[15,1] <- 'Vie' # Acceso al mismo dato usando
df$Dia[15] <- 'Vie' # dos notaciones distintas
df[12:17,]
## Dia Estimado Lectura
## 12 Lun FALSE 6.718259
## 13 Vie TRUE 4.782671
## 14 Dom FALSE 5.347531
## 15 Vie TRUE 4.107657
## 16 Sab FALSE 4.146496
## 17 Dom TRUE 4.019923
Agregar filas y columnas a un data frame
Adición de nuevas filas
str(df)
## 'data.frame': 20 obs. of 3 variables:
## $ Dia : Factor w/ 7 levels "Dom","Jue","Lun",..: 6 1 5 7 4 4 7 3 6 5 ...
## $ Estimado: logi TRUE FALSE TRUE FALSE TRUE FALSE ...
## $ Lectura : num 5.55 5.01 4.63 6.23 5.12 ...
# Cuidado, se pierden los tipos de las columnas y todas pasan a ser character
df[nrow(df)+1,] <- c('Vie', FALSE, 5)
str(df)
## 'data.frame': 21 obs. of 3 variables:
## $ Dia : Factor w/ 7 levels "Dom","Jue","Lun",..: 6 1 5 7 4 4 7 3 6 5 ...
## $ Estimado: chr "TRUE" "FALSE" "TRUE" "FALSE" ...
## $ Lectura : chr "5.54950574821885" "5.00835464449159" "4.62625769663325" "6.23289217295424" ...
#Eliminar fila del paso anterior para matchear con ejercicio
df <- df[1:(dim(df)[1]-1),]
df[nrow(df)+1,] <- data.frame('Vie', F, 5)
str(df)
## 'data.frame': 21 obs. of 3 variables:
## $ Dia : Factor w/ 7 levels "Dom","Jue","Lun",..: 6 1 5 7 4 4 7 3 6 5 ...
## $ Estimado: chr "TRUE" "FALSE" "TRUE" "FALSE" ...
## $ Lectura : chr "5.54950574821885" "5.00835464449159" "4.62625769663325" "6.23289217295424" ...
tail(df)
## Dia Estimado Lectura
## 16 Sab FALSE 4.14649593547535
## 17 Dom TRUE 4.01992251868824
## 18 Dom FALSE 5.71075062575064
## 19 Dom TRUE 4.25256435041239
## 20 Sab FALSE 2.43557799676781
## 21 Vie FALSE 5
df <- rbind(df, data.frame(Dia = fdias[1], Estimado = T, Lectura = 3.1415926))
str(df)
## 'data.frame': 22 obs. of 3 variables:
## $ Dia : Factor w/ 7 levels "Dom","Jue","Lun",..: 6 1 5 7 4 4 7 3 6 5 ...
## $ Estimado: chr "TRUE" "FALSE" "TRUE" "FALSE" ...
## $ Lectura : chr "5.54950574821885" "5.00835464449159" "4.62625769663325" "6.23289217295424" ...
tail(df)
## Dia Estimado Lectura
## 17 Dom TRUE 4.01992251868824
## 18 Dom FALSE 5.71075062575064
## 19 Dom TRUE 4.25256435041239
## 20 Sab FALSE 2.43557799676781
## 21 Vie FALSE 5
## 22 Sab TRUE 3.1415926
Inserción de filas
nuevaFila <- data.frame(Dia = fdias[1], Estimado = F, Lectura = 4242)
df <- rbind(df[1:9,], nuevaFila, df[10:nrow(df),])
df[8:14,]
## Dia Estimado Lectura
## 8 Lun FALSE 3.58512835970337
## 9 Sab TRUE 5.35282743999013
## 10 Sab FALSE 4242
## 101 Mie FALSE 5.65039293595447
## 11 Sab TRUE 5.54892455974784
## 12 Lun FALSE 6.71825915891035
## 13 Vie TRUE 4.78267115333993
Adición de nuevas columnas
df$Ajustado <- as.numeric(df$Lectura) + rnorm(nrow(df), 2) #conversión de columna Lectura char a numeric
df <- cbind(df, Fecha = date())
head(df)
## Dia Estimado Lectura Ajustado Fecha
## 1 Sab TRUE 5.54950574821885 6.642203 Thu Oct 6 22:47:36 2022
## 2 Dom FALSE 5.00835464449159 6.534399 Thu Oct 6 22:47:36 2022
## 3 Mie TRUE 4.62625769663325 7.839938 Thu Oct 6 22:47:36 2022
## 4 Vie FALSE 6.23289217295424 8.247297 Thu Oct 6 22:47:36 2022
## 5 Mar TRUE 5.11736844920342 5.890669 Thu Oct 6 22:47:36 2022
## 6 Mar FALSE 5.28906440674503 7.139080 Thu Oct 6 22:47:36 2022
Inserción de columnas
head(cbind(df[,c(1,3)], Ajustado = as.numeric(df$Lectura) + rnorm(nrow(df),2), df$Estimado)) #conversión de columna Lectura char a numeric
## Dia Lectura Ajustado df$Estimado
## 1 Sab 5.54950574821885 6.782196 TRUE
## 2 Dom 5.00835464449159 6.931451 FALSE
## 3 Mie 4.62625769663325 5.797437 TRUE
## 4 Vie 6.23289217295424 10.165527 FALSE
## 5 Mar 5.11736844920342 7.359127 TRUE
## 6 Mar 5.28906440674503 7.014919 FALSE
Nombres de filas y columnas
names(df)
## [1] "Dia" "Estimado" "Lectura" "Ajustado" "Fecha"
colnames(df)
## [1] "Dia" "Estimado" "Lectura" "Ajustado" "Fecha"
rownames(df)
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "101" "11"
## [13] "12" "13" "14" "15" "16" "17" "18" "19" "20" "21" "22"
Data frames y la escalabilidad
n <- 15
df <- data.frame(Lectura = numeric(n), Fecha = character(n), stringsAsFactors = FALSE)
for(idx in 1:n) {
df$Lectura[idx] <- rnorm(1,10)
df$Fecha[idx] <- as.character(Sys.Date())
}
head(df)
## Lectura Fecha
## 1 9.977593 2022-10-06
## 2 10.142381 2022-10-06
## 3 8.593626 2022-10-06
## 4 9.007072 2022-10-06
## 5 8.832328 2022-10-06
## 6 9.421968 2022-10-06
Listas
Creación de una lista
mes <- matrix(1:35,ncol=7)
rownames(mes) <- c('Semana1','Semana2','Semana3','Semana4','Semana5')
colnames(mes) <- dias
lst1 <- list(3.1415927, 'Hola', TRUE, fdias[4])
lst2 <- list(fdias[1:10], mes, df)
length(lst1)
## [1] 4
lst1
## [[1]]
## [1] 3.141593
##
## [[2]]
## [1] "Hola"
##
## [[3]]
## [1] TRUE
##
## [[4]]
## [1] Vie
## Levels: Dom Jue Lun Mar Mie Sab Vie
length(lst2)
## [1] 3
lst2
## [[1]]
## [1] Sab Dom Mie Vie Mar Mar Vie Lun Sab Mie
## Levels: Dom Jue Lun Mar Mie Sab Vie
##
## [[2]]
## Lun Mar Mie Jue Vie Sab Dom
## Semana1 1 6 11 16 21 26 31
## Semana2 2 7 12 17 22 27 32
## Semana3 3 8 13 18 23 28 33
## Semana4 4 9 14 19 24 29 34
## Semana5 5 10 15 20 25 30 35
##
## [[3]]
## Lectura Fecha
## 1 9.977593 2022-10-06
## 2 10.142381 2022-10-06
## 3 8.593626 2022-10-06
## 4 9.007072 2022-10-06
## 5 8.832328 2022-10-06
## 6 9.421968 2022-10-06
## 7 10.797022 2022-10-06
## 8 8.948889 2022-10-06
## 9 12.156542 2022-10-06
## 10 9.287533 2022-10-06
## 11 9.833424 2022-10-06
## 12 11.894981 2022-10-06
## 13 11.591196 2022-10-06
## 14 8.632354 2022-10-06
## 15 9.011696 2022-10-06
Acceso a los elementos de una lista
lst1[2]
## [[1]]
## [1] "Hola"
lst1[[2]]
## [1] "Hola"
lst1[c(2,3)]
## [[1]]
## [1] "Hola"
##
## [[2]]
## [1] TRUE
lst2[[3]][1]
## Lectura
## 1 9.977593
## 2 10.142381
## 3 8.593626
## 4 9.007072
## 5 8.832328
## 6 9.421968
## 7 10.797022
## 8 8.948889
## 9 12.156542
## 10 9.287533
## 11 9.833424
## 12 11.894981
## 13 11.591196
## 14 8.632354
## 15 9.011696
unlist(lst2)
##
## "6" "1" "5" "7"
##
## "4" "4" "7" "3"
##
## "6" "5" "1" "2"
##
## "3" "4" "5" "6"
##
## "7" "8" "9" "10"
##
## "11" "12" "13" "14"
##
## "15" "16" "17" "18"
##
## "19" "20" "21" "22"
##
## "23" "24" "25" "26"
##
## "27" "28" "29" "30"
##
## "31" "32" "33" "34"
## Lectura1 Lectura2 Lectura3
## "35" "9.977593111156" "10.1423808332646" "8.59362586998506"
## Lectura4 Lectura5 Lectura6 Lectura7
## "9.00707206596167" "8.83232825237973" "9.42196812252" "10.7970223740259"
## Lectura8 Lectura9 Lectura10 Lectura11
## "8.94888945389024" "12.1565422284014" "9.2875326704231" "9.83342441171811"
## Lectura12 Lectura13 Lectura14 Lectura15
## "11.8949810058114" "11.5911961436968" "8.63235373261366" "9.01169649707222"
## Fecha1 Fecha2 Fecha3 Fecha4
## "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06"
## Fecha5 Fecha6 Fecha7 Fecha8
## "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06"
## Fecha9 Fecha10 Fecha11 Fecha12
## "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06"
## Fecha13 Fecha14 Fecha15
## "2022-10-06" "2022-10-06" "2022-10-06"
unlist(lst2, recursive = FALSE)
## [[1]]
## [1] 6
##
## [[2]]
## [1] 1
##
## [[3]]
## [1] 5
##
## [[4]]
## [1] 7
##
## [[5]]
## [1] 4
##
## [[6]]
## [1] 4
##
## [[7]]
## [1] 7
##
## [[8]]
## [1] 3
##
## [[9]]
## [1] 6
##
## [[10]]
## [1] 5
##
## [[11]]
## [1] 1
##
## [[12]]
## [1] 2
##
## [[13]]
## [1] 3
##
## [[14]]
## [1] 4
##
## [[15]]
## [1] 5
##
## [[16]]
## [1] 6
##
## [[17]]
## [1] 7
##
## [[18]]
## [1] 8
##
## [[19]]
## [1] 9
##
## [[20]]
## [1] 10
##
## [[21]]
## [1] 11
##
## [[22]]
## [1] 12
##
## [[23]]
## [1] 13
##
## [[24]]
## [1] 14
##
## [[25]]
## [1] 15
##
## [[26]]
## [1] 16
##
## [[27]]
## [1] 17
##
## [[28]]
## [1] 18
##
## [[29]]
## [1] 19
##
## [[30]]
## [1] 20
##
## [[31]]
## [1] 21
##
## [[32]]
## [1] 22
##
## [[33]]
## [1] 23
##
## [[34]]
## [1] 24
##
## [[35]]
## [1] 25
##
## [[36]]
## [1] 26
##
## [[37]]
## [1] 27
##
## [[38]]
## [1] 28
##
## [[39]]
## [1] 29
##
## [[40]]
## [1] 30
##
## [[41]]
## [1] 31
##
## [[42]]
## [1] 32
##
## [[43]]
## [1] 33
##
## [[44]]
## [1] 34
##
## [[45]]
## [1] 35
##
## $Lectura
## [1] 9.977593 10.142381 8.593626 9.007072 8.832328 9.421968 10.797022
## [8] 8.948889 12.156542 9.287533 9.833424 11.894981 11.591196 8.632354
## [15] 9.011696
##
## $Fecha
## [1] "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06"
## [6] "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06"
## [11] "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06" "2022-10-06"
Asignación de nombres a los elementos
names(lst1) <- c('PI','Mensaje','Activado','Inicio')
lst1[[1]]
## [1] 3.141593
lst1[['PI']]
## [1] 3.141593
lst1$PI
## [1] 3.141593