numero <- 34.80
numero## [1] 34.8
class(numero)## [1] "numeric"
vector_numeros <- c(10.2, 20.5, 30.8)
vector_numeros## [1] 10.2 20.5 30.8
class(vector_numeros)## [1] "numeric"
enteros1 <- 5
class(enteros1)## [1] "numeric"
enteros2 <- 5L
class(enteros2)## [1] "integer"
vector_enteros <- c(1L, 2L, 3L)
class(vector_enteros)## [1] "integer"
texto <- "universidad"
class(texto)## [1] "character"
vector_texto <- c("a", "b", "c")
class(vector_texto)## [1] "character"
vector_mes_malo <- factor(c("enero", "febrero", "marzo", "abril", "enero"))
class(vector_mes_malo)## [1] "factor"
levels(vector_mes_malo)## [1] "abril" "enero" "febrero" "marzo"
vector_mes_bueno <- factor(c("enero", "febrero", "marzo", "abril", "enero"),
levels = c("enero", "febrero", "marzo", "abril"))
levels(vector_mes_bueno)## [1] "enero" "febrero" "marzo" "abril"
logico <- TRUE
class(logico)## [1] "logical"
vector_logicos <- c(TRUE, FALSE, TRUE, FALSE)
class(vector_logicos)## [1] "logical"
valor_ausente <- NA
class(valor_ausente)## [1] "logical"
vector_ausente <- c(34.6, NA, 56.9)
class(vector_ausente)## [1] "numeric"
vector_numeros <- c(3, 5, 10)
2 + vector_numeros## [1] 5 7 12
vector1 <- c(1, 3, 5)
vector2 <- c(10, 20, 30)
vector1 + vector2 ## [1] 11 23 35
vector3 <- c(10, 20)
vector4 <- c(10, 20, 30)
vector3 + vector4## [1] 20 40 40
2^2## [1] 4
vector3^3## [1] 1000 8000
50 == 40## [1] FALSE
vector_ejemplo <- c(40, 20, 2)
2 == vector_ejemplo## [1] FALSE FALSE TRUE
3 != 3## [1] FALSE
texto <- c("A", "a", "b")
"a" != texto## [1] TRUE FALSE TRUE
numeros2 <- c(10, 20, 30, 40)
30 %in% numeros2## [1] TRUE
texto2 <- c("a", "b", "c", "A")
"A" %in% texto2## [1] TRUE
ejemplo <- c(30, 40, 50, 10)
ejemplo >= 20## [1] TRUE TRUE TRUE FALSE
!2 == 2 # no es igual 2 a 2## [1] FALSE
!3 %in% c(2, 3, 5) # no pertenece el 3 al vector## [1] FALSE
ejemplo2 <- c(1, 5, 10, 15, 20)
ejemplo2 > 10 | ejemplo2 < 5## [1] TRUE FALSE FALSE TRUE TRUE
frutas1 <- c(80.5, 90, 50.4, 30)
frutas2 <- c(90, 30, 40, 20)
frutas1 > 80 | frutas2 > 80## [1] TRUE TRUE FALSE FALSE
frutas1 > 80 & frutas2 > 80## [1] TRUE FALSE FALSE FALSE
frutas1 > 80 || frutas2 > 80## [1] TRUE
frutas1 > 80 && frutas2 > 80## [1] TRUE
str(iris)## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
library(tidyverse)## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
mezcla1 <- c(1, 20, "a")
class(mezcla1)## [1] "character"
mezcla1## [1] "1" "20" "a"
mezcla2 <- as.numeric(mezcla1)
class(mezcla2)## [1] "numeric"
mezcla2## [1] 1 20 NA
peso <- c("1.5", "2.50", "3.50", "4.5", "5")
class(peso)## [1] "character"
peso2 <- as.numeric(peso)
peso2## [1] 1.5 2.5 3.5 4.5 5.0
class(peso2)## [1] "numeric"
problema <- "2,2"
as.numeric(problema)## [1] NA
vector1 <- c(1, 4, 20, 50, 20)
length(vector1)## [1] 5
vector1## [1] 1 4 20 50 20
vector_nombrado <- c(elemento1 = 20, elemento2 = 50)
names(vector_nombrado)## [1] "elemento1" "elemento2"
vector_nombrado## elemento1 elemento2
## 20 50
names(vector_nombrado) <- c("variable1", "variable2")
vector_nombrado## variable1 variable2
## 20 50
secuencia1 <- c(1:10)
secuencia1## [1] 1 2 3 4 5 6 7 8 9 10
secuencia2 <- seq(from = 20, to = 50, by = 2)
secuencia2## [1] 20 22 24 26 28 30 32 34 36 38 40 42 44 46 48 50
seq(from = 1, to = 20, length.out = 30)## [1] 1.000000 1.655172 2.310345 2.965517 3.620690 4.275862 4.931034
## [8] 5.586207 6.241379 6.896552 7.551724 8.206897 8.862069 9.517241
## [15] 10.172414 10.827586 11.482759 12.137931 12.793103 13.448276 14.103448
## [22] 14.758621 15.413793 16.068966 16.724138 17.379310 18.034483 18.689655
## [29] 19.344828 20.000000
rep(1, 20)## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
rep("a", 5)## [1] "a" "a" "a" "a" "a"
rep(c("A", "B", "C", "D"), 12)## [1] "A" "B" "C" "D" "A" "B" "C" "D" "A" "B" "C" "D" "A" "B" "C" "D" "A" "B" "C"
## [20] "D" "A" "B" "C" "D" "A" "B" "C" "D" "A" "B" "C" "D" "A" "B" "C" "D" "A" "B"
## [39] "C" "D" "A" "B" "C" "D" "A" "B" "C" "D"
rep(c("T1", "T2"), each = 12)## [1] "T1" "T1" "T1" "T1" "T1" "T1" "T1" "T1" "T1" "T1" "T1" "T1" "T2" "T2" "T2"
## [16] "T2" "T2" "T2" "T2" "T2" "T2" "T2" "T2" "T2"
paste("T", 1, sep = "-")## [1] "T-1"
mi_nombre <- "Edimer"
paste0("Hola ", mi_nombre, " ¿Cómo estás?")## [1] "Hola Edimer ¿Cómo estás?"
paste0("T", 1:12)## [1] "T1" "T2" "T3" "T4" "T5" "T6" "T7" "T8" "T9" "T10" "T11" "T12"
vector5 <- c(10, 20, 30)
vector5[2]## [1] 20
vector5[c(1, 3)]## [1] 10 30
vector6 <- c(1, 10, 20, 30, 40, 50)
vector6[c(3:6)]## [1] 20 30 40 50
vector7 <- c(v1 = 3, v2 = 4, v3 = 5)
vector7["v2"]## v2
## 4
vector8 <- c(1, 2, 3, 6, 10, 50, 100, 500, 1000)
vector8[vector8 > 100]## [1] 500 1000
vector9 <- c(10, 20)
vector9[c(FALSE, TRUE)]## [1] 20
promedio <- mean(vector8)
vector8[vector8 > promedio]## [1] 500 1000
vector8[!vector8 > promedio]## [1] 1 2 3 6 10 50 100
departamentos <- c("antioquia", "valle", "nariño", "caldas")
hurtos <- c(40, 50, 20, 15)
departamentos[hurtos > 20]## [1] "antioquia" "valle"
library(stringr)
mi_nombre <- "Edimer"
str_c("Hola ", mi_nombre, " ¿Cómo estás?")## [1] "Hola Edimer ¿Cómo estás?"
deptos <- c("Antioquia", "Santander")
mpios <- c("Medellín", "Bucaramanga")
str_c(deptos, "-", mpios)## [1] "Antioquia-Medellín" "Santander-Bucaramanga"
texto4 <- "Este es mi texto"
str_length(texto4)## [1] 16
texto5 <- c("Mi texto", "antioquia", "cali")
str_length(texto5)## [1] 8 9 4
numero_malo <- c("2,20")
as.numeric(numero_malo)## [1] NA
numero_bien <- str_replace_all(string = numero_malo,
pattern = ",",
replacement = ".")
as.numeric(numero_bien)## [1] 2.2
texto6 <- c("1Felipe", "3Ramiro", "4Mauricio", "1Dora", "2Paola")
str_sort(texto6, decreasing = TRUE)## [1] "4Mauricio" "3Ramiro" "2Paola" "1Felipe" "1Dora"
str_sort(texto6, decreasing = FALSE) # por defecto## [1] "1Dora" "1Felipe" "2Paola" "3Ramiro" "4Mauricio"
texto7 <- c("EDIMER", "UNIVERSIDAD", "R")
str_to_lower(texto7)## [1] "edimer" "universidad" "r"
texto8 <- c("Perro", "perro", "PERRO")
str_to_lower(texto8)## [1] "perro" "perro" "perro"
texto9 <- c("antioquia", "santander", "cundinamarca")
str_to_upper(texto9)## [1] "ANTIOQUIA" "SANTANDER" "CUNDINAMARCA"
texto10 <- c("ANTIOQUIA", "valle del cauca")
str_to_sentence(texto10)## [1] "Antioquia" "Valle del cauca"
str_to_title(texto10)## [1] "Antioquia" "Valle Del Cauca"
texto12 <- c("Raza A", "Raza A ", " Raza A")
str_trim(texto12)## [1] "Raza A" "Raza A" "Raza A"
matriz1 <- matrix(c(2, 3, 4, 5))
matriz1## [,1]
## [1,] 2
## [2,] 3
## [3,] 4
## [4,] 5
matriz2 <- matrix(c(10, 20, 30, 40), nrow = 2, ncol = 2)
matriz2## [,1] [,2]
## [1,] 10 30
## [2,] 20 40
matriz3 <- matrix(c(100, 200, 300, 400), nrow = 2, ncol = 2,
byrow = TRUE)
matriz3## [,1] [,2]
## [1,] 100 200
## [2,] 300 400
dim(matriz3)## [1] 2 2
dim(matriz1)## [1] 4 1
dim(iris) # sobre una base de datos## [1] 150 5
nrow(matriz1)## [1] 4
nrow(iris) # sobre una base de datos## [1] 150
ncol(matriz1)## [1] 1
ncol(iris) # sobre una base de datos## [1] 5
matriz4 <- matrix(c(1, 5, 50, 500, 200, 1000), ncol = 2, nrow = 3)
matriz4## [,1] [,2]
## [1,] 1 500
## [2,] 5 200
## [3,] 50 1000
matriz4[ , 1] # primera columna## [1] 1 5 50
matriz4[1, ] # primera fila## [1] 1 500
matriz4[c(1, 3), 2] # Queremos el 500 y el 1000## [1] 500 1000
matriz5 <- matrix(c(10, 20, 30, 40), ncol = 2)
matriz5 + 1 # suma con escalar## [,1] [,2]
## [1,] 11 31
## [2,] 21 41
matriz5 * 5 # multiplicación con escalar## [,1] [,2]
## [1,] 50 150
## [2,] 100 200
matriz6 <- matrix(c(2, 4, 6, 8), ncol = 2)
matriz6## [,1] [,2]
## [1,] 2 6
## [2,] 4 8
matriz5 %*% matriz6 # multiplicación de matrices## [,1] [,2]
## [1,] 140 300
## [2,] 200 440
diag(matriz5) # diagonal## [1] 10 40
solve(matriz5) # inversa## [,1] [,2]
## [1,] -0.2 0.15
## [2,] 0.1 -0.05
lista1 <- list(1, "A", TRUE)
lista1## [[1]]
## [1] 1
##
## [[2]]
## [1] "A"
##
## [[3]]
## [1] TRUE
lista2 <- list(p1 = "Universidad", p2 = 1234, p3 = c(TRUE, FALSE))
lista2## $p1
## [1] "Universidad"
##
## $p2
## [1] 1234
##
## $p3
## [1] TRUE FALSE
lista2$p3## [1] TRUE FALSE
class(lista2)## [1] "list"
class(lista2$p3)## [1] "logical"
lista2[3]## $p3
## [1] TRUE FALSE
lista2[[3]]## [1] TRUE FALSE
lista2$p3[2]## [1] FALSE
vector8 <- c(1, 20, 50)
vector8[4] <- 100
vector8## [1] 1 20 50 100
matriz7 <- matrix(c(10, 20, 30, 40), ncol = 2)
matriz7## [,1] [,2]
## [1,] 10 30
## [2,] 20 40
# Agregando columnas
matriz8 <- cbind(matriz7, c(100, 200))
matriz8## [,1] [,2] [,3]
## [1,] 10 30 100
## [2,] 20 40 200
# Agregando filas
rbind(matriz7, c(1000, 2000))## [,1] [,2]
## [1,] 10 30
## [2,] 20 40
## [3,] 1000 2000
# Agregando filas de otra naturaleza
rbind(matriz7, c("A", "B"))## [,1] [,2]
## [1,] "10" "30"
## [2,] "20" "40"
## [3,] "A" "B"
lista3 <- list(e1 = 1, e2 = "A", e3 = TRUE)
lista3## $e1
## [1] 1
##
## $e2
## [1] "A"
##
## $e3
## [1] TRUE
# Agregando nuevo dato
lista3$e4 <- NA
lista3## $e1
## [1] 1
##
## $e2
## [1] "A"
##
## $e3
## [1] TRUE
##
## $e4
## [1] NA
names(lista3)## [1] "e1" "e2" "e3" "e4"
length(lista3)## [1] 4
library(readr)
datos_comas <- read_csv(file = "encuesta_comas.csv")##
## -- Column specification --------------------------------------------------------
## cols(
## promedio_academico = col_double(),
## color_favorito = col_character(),
## horas_estudiar = col_double(),
## horas_dormir = col_double(),
## redes_sociales = col_double(),
## redsocial_favorita = col_character(),
## bachiller_universidad = col_character(),
## lectura = col_character(),
## horas_internet = col_double(),
## trabajo = col_character()
## )
datos_comasdatos_pcomas <- read_csv2(file = "encuesta_puntocomas.csv")## i Using ',' as decimal and '.' as grouping mark. Use `read_delim()` for more control.
##
## -- Column specification --------------------------------------------------------
## cols(
## promedio_academico = col_double(),
## color_favorito = col_character(),
## horas_estudiar = col_double(),
## horas_dormir = col_double(),
## redes_sociales = col_double(),
## redsocial_favorita = col_character(),
## bachiller_universidad = col_character(),
## lectura = col_character(),
## horas_internet = col_double(),
## trabajo = col_character()
## )
datos_tabulacion <- read_tsv(file = "encuesta_espacios.csv")##
## -- Column specification --------------------------------------------------------
## cols(
## promedio_academico = col_double(),
## color_favorito = col_character(),
## horas_estudiar = col_double(),
## horas_dormir = col_double(),
## redes_sociales = col_double(),
## redsocial_favorita = col_character(),
## bachiller_universidad = col_character(),
## lectura = col_character(),
## horas_internet = col_double(),
## trabajo = col_character()
## )
datos_otro <- read_delim(file = "encuesta_delim.csv", delim = "/")##
## -- Column specification --------------------------------------------------------
## cols(
## promedio_academico = col_double(),
## color_favorito = col_character(),
## horas_estudiar = col_double(),
## horas_dormir = col_double(),
## redes_sociales = col_double(),
## redsocial_favorita = col_character(),
## bachiller_universidad = col_character(),
## lectura = col_character(),
## horas_internet = col_double(),
## trabajo = col_character()
## )
library(readxl)
datos_excel <- read_excel(path = "encuesta_excel.xlsx")
datos_excelexcel_sheets(path = "encuesta_excel.xlsx")## [1] "Sheet1" "Hoja2 "
names(datos_excel) # nombres de columnas## [1] "promedio_academico" "color_favorito" "horas_estudiar"
## [4] "horas_dormir" "redes_sociales" "redsocial_favorita"
## [7] "bachiller_universidad" "lectura" "horas_internet"
## [10] "trabajo"
nrow(datos_excel) # número de filas## [1] 30
ncol(datos_excel) # número de columnas## [1] 10
dim(datos_excel) # Dimensiones (filas y columnas)## [1] 30 10
class(datos_excel$promedio_academico) # Clase de una variable## [1] "numeric"
library(dplyr)
glimpse(datos_excel)## Rows: 30
## Columns: 10
## $ promedio_academico <dbl> 3.53, 3.80, 4.02, 4.39, 3.99, 3.80, 3.83, 3.80, ~
## $ color_favorito <chr> "Verde", "Amarillo", "Rojo", "Negro", "Azul", "V~
## $ horas_estudiar <dbl> 30, 12, 45, 60, 50, 25, 28, 40, 40, 36, 32, 35, ~
## $ horas_dormir <dbl> 7.0, 6.0, 6.0, 6.0, 8.0, 2.0, 2.0, 6.0, 6.0, 7.0~
## $ redes_sociales <dbl> 4, 9, 3, 2, 9, 5, 2, 4, 4, 5, 4, 3, 4, 4, 5, 5, ~
## $ redsocial_favorita <chr> "Youtube", "Facebook", "Facebook", "Facebook", "~
## $ bachiller_universidad <chr> "1 Año", "Menos de 1 año", "1 Año", "Menos de 1 ~
## $ lectura <chr> "Muy poco", "Frecuentemente", "Frecuentemente", ~
## $ horas_internet <dbl> 20, 3, 42, 90, 56, 40, 30, 80, 80, 10, 14, 80, 6~
## $ trabajo <chr> "Sí", "Sí", "No", "No", "No", "No", "No", "Sí", ~
datos_excel[1:20, ]datos_excel[c(1, 10, 20, 30), ]datos_excel[c(1, 10), c("color_favorito", "horas_dormir")]mediana <- median(datos_excel$promedio_academico)
datos_excel[datos_excel$promedio_academico > mediana, ]datos_excel[datos_excel$promedio_academico > mediana
& datos_excel$color_favorito == "Negro", ]departamento <- c("Antioquia", "Santander", "Cundinamarca")
casos_covid <- c(1234, 2340, 4580)
mis_datos <- data.frame(depto = departamento, covid = casos_covid)
mis_datos