This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

# creating a vector of numbers and then converting it to logical
   # and character
   numbers.vec <- c(-3,-2,-1,0,1,2,3)
numbers.vec
[1] -3 -2 -1  0  1  2  3
num2char <- as.character(numbers.vec)
num2char
[1] "-3" "-2" "-1" "0"  "1"  "2"  "3" 
num2logical <- as.logical(numbers.vec)
num2logical
[1]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
char.vec <- c("1","3","five","7")
char.vec
[1] "1"    "3"    "five" "7"   

Vector

Un vector es un arreglo de datos. En R, es el tipo básico de almacenaminto de datos. El vector puede ser numerico, caracter, y logico basado en sus elementos.

vector1 <- c(1,3,5,7,9)
vector1
[1] 1 3 5 7 9
# accessing second elements of "vector1"
vector1[2]
[1] 3
# accessing three elements starting from second element
vector1[2:4]
[1] 3 5 7

Factor

Un factor es otro tipo de dato importante en R, especialmente cuando se trata de variables categóricas. En un vector R, no hay límite en el número de elementos distintos, pero en las variables factoriales, sólo se requiere un número limitado de elementos distintos.

#creating factor variable with only one argument with factor()
factor1 <- factor(c(1,2,3,4,5,6,7,8,9))
factor1
[1] 1 2 3 4 5 6 7 8 9
Levels: 1 2 3 4 5 6 7 8 9

Data frame

Un marco de datos es una disposición rectangular de filas y columnas de vectores y/o factores, como una hoja de cálculo en MS Excel.

#creating vector of different variables and then creating data frame
var1 <- c(101,102,103,104,105)
var2 <- c(25,22,29,34,33)
var3 <- c("Non-Diabetic", "Diabetic", "Non-Diabetic", "Non-Diabetic", "Diabetic")
var4 <- factor(c("male","male","female","female","male"))
# now we will create data frame using two numeric vectors one
# character vector and one factor
diab.dat <- data.frame(var1,var2,var3,var4)
diab.dat

Determinar las clases de cada columna.

#class of each column before creating data frame
class(var1)
[1] "numeric"

Para acceder a columnas individuales de un dataframe use ($)

diab.dat$var1
[1] 101 102 103 104 105
diab.dat[["var1"]]
[1] 101 102 103 104 105
diab.dat[,1]
[1] 101 102 103 104 105

Matrices

Arreglo bidimensional de variables del mismo tipo.

# data frame to matrix conversion
mat.diab <- as.matrix(diab.dat)
mat.diab
     var1  var2 var3           var4    
[1,] "101" "25" "Non-Diabetic" "male"  
[2,] "102" "22" "Diabetic"     "male"  
[3,] "103" "29" "Non-Diabetic" "female"
[4,] "104" "34" "Non-Diabetic" "female"
[5,] "105" "33" "Diabetic"     "male"  
class(mat.diab)
[1] "matrix"

Para poder realizar operaciones matemáticas en matrices deben ser valores numericos.

# creating a matrix with numeric elements only
# To produce the same matrix over time we set a seed value
set.seed(12345)
num.mat <- matrix(rnorm(9),nrow=3,ncol=3)
num.mat
           [,1]       [,2]       [,3]
[1,]  0.5855288 -0.4534972  0.6300986
[2,]  0.7094660  0.6058875 -0.2761841
[3,] -0.1093033 -1.8179560 -0.2841597
# matrix multiplication
t(num.mat) %*% num.mat
          [,1]       [,2]       [,3]
[1,] 0.8581332 0.36302951 0.20405722
[2,] 0.3630295 3.87772320 0.06350551
[3,] 0.2040572 0.06350551 0.55404860

Arrays

Un arreglo es una entrada de datos de múltiples suscripciones que permite el almacenamiento de tramas de datos, matrices o vectores de diferentes tipos. Los marcos y matrices de datos son de dos dimensiones solamente, pero un arreglo puede ser de cualquier número de dimensiones.

mat.array=array(dim=c(2,2,3))
# To produce the same results over time we set a seed value
set.seed(12345)
mat.array[,,1]<-rnorm(4)
mat.array[,,2]<-rnorm(4)
mat.array[,,3]<-rnorm(4)
mat.array
, , 1

          [,1]       [,2]
[1,] 0.5855288 -0.1093033
[2,] 0.7094660 -0.4534972

, , 2

           [,1]       [,2]
[1,]  0.6058875  0.6300986
[2,] -1.8179560 -0.2761841

, , 3

           [,1]       [,2]
[1,] -0.2841597 -0.1162478
[2,] -0.9193220  1.8173120

list

Es un objeto que puede almacenar otros objetos de cualquier tipo.

var1 <- c(101,102,103,104,105)
var2 <- c(25,22,29,34,33)
var3 <- c("Non-Diabetic", "Diabetic", "Non-Diabetic", "Non-Diabetic",
"Diabetic")
var4 <- factor(c("male","male","female","female","male"))
diab.dat <- data.frame(var1,var2,var3,var4)
mat.array=array(dim=c(2,2,3))
set.seed(12345)
mat.array[,,1]<-rnorm(4)
mat.array[,,2]<-rnorm(4)
mat.array[,,3]<-rnorm(4)
# creating list
obj.list <- list(elem1=var1, elem2=var2, elem3=var3, elem4=var4, elem5=diab.dat, elem6=mat.array)
obj.list
$elem1
[1] 101 102 103 104 105

$elem2
[1] 25 22 29 34 33

$elem3
[1] "Non-Diabetic" "Diabetic"     "Non-Diabetic" "Non-Diabetic"
[5] "Diabetic"    

$elem4
[1] male   male   female female male  
Levels: female male

$elem5

$elem6
, , 1

          [,1]       [,2]
[1,] 0.5855288 -0.1093033
[2,] 0.7094660 -0.4534972

, , 2

           [,1]       [,2]
[1,]  0.6058875  0.6300986
[2,] -1.8179560 -0.2761841

, , 3

           [,1]       [,2]
[1,] -0.2841597 -0.1162478
[2,] -0.9193220  1.8173120

Acceso a elementos de la lista.

obj.list[[1]]
[1] 101 102 103 104 105

Missing values

missing_dat <- data.frame(v1=c(1,NA,0,1),v2=c("M","F",NA,"M"))
missing_dat
is.na(missing_dat$v1)
[1] FALSE  TRUE FALSE FALSE
is.na(missing_dat$v2)
[1] FALSE FALSE  TRUE FALSE
any(is.na(missing_dat))
[1] TRUE

Basic Data Manipulation

Acquiring data

Formatos comunes: Comma separated values (.csv), Text file with Tab delimited, MS Excel file (.xls or .xlsx) , R data object (.RData)

# Before running the following command we need to set the data
# location using setwd(). For example setwd("d:/chap2").
anscombe <- read.csv("datos.csv",skip=2)
no fue posible abrir el archivo 'datos.csv': No such file or directoryError in file(file, "rt") : no se puede abrir la conexión

Importar archivo csv separado por semicolon (“;”)

Importar archivo csv separado por tab (“”)

Excel file

Calling xlsx library

library(xlsx) # importing xlsxanscombe.xlsx ans_xlsx <- read.xlsx2(“datos.xlsx”,sheetIndex=1)

Rdata

loading robjects.RData file

load(“robjects.RData”)

Date processing

# Second January 1970 is showing number of elapsed day is 1.
as.Date("1970-01-02")
[1] "1970-01-02"
as.numeric(as.Date("1970-01-02"))
[1] 1
# creating date object specifying format of date
as.Date("Jan-01-1970",format="%b-%d-%Y")
[1] NA
# loading lubridate package
library(lubridate)

Attaching package: ‘lubridate’

The following object is masked from ‘package:base’:

    date
# creating date object using mdy() function
mdy("Jan-01-1970")
[1] "1970-01-01"
# Creating date object using based R functionality
   date <- as.POSIXct("23-07-2013",format = "%d-%m-%Y", tz = "UTC")
# extracting month from the date object
   as.numeric(format(date, "%m"))
[1] 7
# accessing system date and time
   current_time <- now()
   current_time
[1] "2019-09-07 03:14:42 -03"
# changing time zone to "GMT"
   current_time_gmt <- with_tz(current_time,"GMT")
   current_time_gmt
[1] "2019-09-07 06:14:42 GMT"
# rounding the date to nearest day
   round_date(current_time_gmt,"day")
[1] "2019-09-07 GMT"

Character manipulation

Uso de librería “stringr” str_c(): This function is used to concatenate a vector of characters with a default separator as a space. str_length(): This returns the number of characters in a character string str_dup(): This is used to duplicate the characters within a string. str_trim(): This is used to remove the leading and trailing whitespaces. str_pad(): This is used to pad a string with extra whitespaces on the left, right, or both sides.

library(stringr)

Subconjuntos

# creating a 10 element vector
num10 <- c(3,2,5,3,9,6,7,9,2,3)
# accessing fifth element
num10[5]
# checking whether there is any value of num10 object greater
# than 6
num10>6
# keeping only values greater than 6
num10[num10>6]
# use of negative subscript removes first element "3"
num10[-1]

para objetos bidimensionales

# creating a data frame with 2 variables
data_2variable <- data.frame(x1=c(2,3,4,5,6),x2=c(5,6,7,8,1))
# accessing only first row
data_2variable[1,]
# accessing only first column
   data_2variable[,1]
[1] 2 3 4 5 6
# accessing first row and first column
   data_2variable[1,1]
[1] 2

seleccion de matrices

list_obj<- list(dat=data_2variable,vec.obj=c(1,2,3))
list_obj
$dat

$vec.obj
[1] 1 2 3
$vec.obj
# accessing second element of the list_obj objects
   list_obj[[2]]
# accessing dataset from the list object 
list_obj$dat
