[1] 3
[1] 2
[1] 3.333333
[1] 3.3
[1] 3.333333
[1] 12
[1] 1000
[1] 3.162278
[1] 3.162278
[1] 6.909753
[1] 22026.47
[1] 3+2i
[1] 5
[1] 2.302585
[1] TRUE
[1] FALSE
[1] FALSE
# Objetos
# objetos, el nombre que se le da a algun dato o grupos de datos
# ls() es para ver que objetos estan en el environment
# tipos de objeto, el vector atomico con c()
escalar=3
vector=c(2,2,1,2,2)# vector columna
# c concatenate,
length(vector)[1] 5
[1] TRUE
[1] 6 6 3 6 6
[1] 1.5 1.5 3.0 1.5 1.5
[1] 5 5 4 5 5
[1] 5
[1] 4 4 2 0 4
[1] 0 0 0 4 0
[,1]
[1,] 9
[,1] [,2] [,3] [,4] [,5]
[1,] 4 4 2 -4 4 [2,] 4 4 2 -4 4 [3,] 2 2 1 -2 2 [4,] 4 4 2 -4 4 [5,] 4 4 2 -4 4
[1] 51.3
[1] 51.41119
[1] 28.23699
[1] 797.3276
[1] 513409.1
[1] Inf
[1] 99.99503
[1] 2.009986
[1] 97.98505
[1] 2.009986 2.012019 2.046270 2.060663 2.066524 2.072463
Min. 1st Qu. Median Mean 3rd Qu. Max. 2.01 26.90 51.41 51.34 76.09 100.00
[1] “raul”
[1] “raul” “kevin”
[1] 49.02189
[1] 49.93806
[1] NA
-Instalacion de librerias - Limpiar el environment - Objetos - Diferentes tipos de objetos
[1] 3
[1] TRUE
[1] 1.666667
[1] 1 NA 2
####
vector1[c(1,3)]=NA
vector1[(1:2)]=NA
# haciendo un vector mas grande
vector2=runif(10,5,10)
# asignar nas
vector2[c(2:3,5)]=NA
# quitar los nas
vector3=vector2[complete.cases(vector2)]
mean(vector3)[1] 7.793561
# Vector categórico
#tiene que ir entre comillas`
nombres=c('rojo','blanco','negro')
nombres1=sample(c('sol','aguila'),100,replace = TRUE, prob=c(.2,.8))
table(nombres1)nombres1 aguila sol 83 17
nombres1 aguila sol 83 17
vector_log FALSE TRUE 2 1
# generar un vector numérico
# 1, 2, 3, 4
# 1= norte, 2=sur, 3=este....
# vector numerico
nombres_fac=sample(c(1,2),10,replace = TRUE)
nombres_fac[1] 2 1 2 2 1 2 1 2 1 1
[1] “numeric”
[1] 2 1 2 2 1 2 1 2 1 1 attr(,“levels”) [1] “rico” “pobre”
# Vectores especiales
# de tiempo
tiempo=1900:2020
# replicar numeros
# el numero y cuantas veces
rep(1,10)[1] 1 1 1 1 1 1 1 1 1 1
[1] 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8
[1] 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1
[1] 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2
[1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 5 5 5 5 5
[1] 1 1 1 1 1 1 1 1 8 8 8 8 8 8 8 8 8 8 8 8
# secuencia
# secuencia tiene, donde empieza, el tamano y cada cuanto
# secuencia de 1, del tamano de a1, cada .5
a1=1:15
# seq(10,tamaño,cada cuanto)
seq(5,10,.1)[1] 5.0 5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9 6.0 6.1 6.2 6.3 6.4 [16] 6.5 6.6 6.7 6.8 6.9 7.0 7.1 7.2 7.3 7.4 7.5 7.6 7.7 7.8 7.9 [31] 8.0 8.1 8.2 8.3 8.4 8.5 8.6 8.7 8.8 8.9 9.0 9.1 9.2 9.3 9.4 [46] 9.5 9.6 9.7 9.8 9.9 10.0
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
[1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 [16] 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 12.5 13.0 13.5 14.0 14.5 15.0
[1] 2 3 4 5 6 7 8 9 10
[1] 5 6 7 8 9 10
[1] “2019-01-01” “2019-01-02” “2019-01-03” “2019-01-04” “2019-01-05” [6] “2019-01-06” “2019-01-07” “2019-01-08” “2019-01-09” “2019-01-10” [11] “2019-01-11” “2019-01-12” “2019-01-13” “2019-01-14” “2019-01-15” [16] “2019-01-16” “2019-01-17” “2019-01-18” “2019-01-19” “2019-01-20” [21] “2019-01-21” “2019-01-22” “2019-01-23” “2019-01-24” “2019-01-25” [26] “2019-01-26” “2019-01-27” “2019-01-28” “2019-01-29” “2019-01-30” [31] “2019-01-31”
# realizando una gráfica
var1=cumsum(rnorm(31))
fecha=190101:190131
fecha1=ymd(fecha)
# serie de tiempo, eje horizontal, seguendo variable
## colores de html en internet
## #115C63
plot(fecha1,var1,type='l',lwd=3,lty=2,
col="#115C63", xlab = "Tiempo", ylab='', main='', cex.axis=.6)# Vectores y matrices
a1=c(1,2,3,1,2)
a2=c(1,2,4,1,2)
a3=c(1,2,1,1,1)
b1=c(1,2)
b2=c(1,1)
# uniendo por columnas
mat1=cbind(a1,a2,a3)
# uniendo por filas
rbind(a1,a2,a3)[,1] [,2] [,3] [,4] [,5] a1 1 2 3 1 2 a2 1 2 4 1 2 a3 1 2 1 1 1
[,1] [,2] [,3] [,4] [,5] a1 1 2 3 1 2 a2 1 2 4 1 2 a3 1 2 1 1 1
[1] 3 5
a b c d e
azul 1 2 3 1 2 rojo 1 2 4 1 2 verde 1 2 1 1 1
a b c d e
azul 0.04 0.08 0.12 0.04 0.08 rojo 0.04 0.08 0.16 0.04 0.08 verde 0.04 0.08 0.04 0.04 0.04
a b c d e
azul 0.1111111 0.2222222 0.3333333 0.1111111 0.2222222 rojo 0.1000000 0.2000000 0.4000000 0.1000000 0.2000000 verde 0.1666667 0.3333333 0.1666667 0.1666667 0.1666667
a b c d e
azul 0.3333333 0.3333333 0.375 0.3333333 0.4 rojo 0.3333333 0.3333333 0.500 0.3333333 0.4 verde 0.3333333 0.3333333 0.125 0.3333333 0.2
Vectores y matrices
# los vectores se pueden unir para formar una matriz
# las matrices deben ser numericas
a1=c(1,2,3,1,2)
a2=c(1,2,4,1,2)
a3=c(1,2,1,1,1)
# formar unos vectores
b1=c(1,2)
b2=c(1,1)
# dos vectores
b1=c(2,2)
b2=c(3,2)
# poner nombre
matriz a b c d e
azul 1 2 3 1 2 rojo 1 2 4 1 2 verde 1 2 1 1 1
function (x, as.factor = FALSE) { if (as.factor) { labs <- colnames(x, do.NULL = FALSE, prefix = "") res <- factor(.Internal(col(dim(x))), labels = labs) dim(res) <- dim(x) res } else .Internal(col(dim(x))) } <bytecode: 0x7f8274c91dd0> <environment: namespace:base>
b1 b2
[1,] 2 3 [2,] 2 2
[,1] [,2] b1 2 2 b2 3 2
[,1] [,2] [,3] [,4] [,5] a1 1 2 3 1 2 a2 1 2 4 1 2 a3 1 2 1 1 1
[,1] [,2] [,3] [,4] [,5] a1 1 2 3 1 2 a2 1 2 4 1 2 a3 1 2 1 1 1
[1] 3 5
a b c d e
azul 1 2 3 1 2 rojo 1 2 4 1 2 verde 1 2 1 1 1
a b c d e
azul 0.04 0.08 0.12 0.04 0.08 rojo 0.04 0.08 0.16 0.04 0.08 verde 0.04 0.08 0.04 0.04 0.04
a b c d e
azul 0.1111111 0.2222222 0.3333333 0.1111111 0.2222222 rojo 0.1000000 0.2000000 0.4000000 0.1000000 0.2000000 verde 0.1666667 0.3333333 0.1666667 0.1666667 0.1666667
a b c d e
azul 0.3333333 0.3333333 0.375 0.3333333 0.4 rojo 0.3333333 0.3333333 0.500 0.3333333 0.4 verde 0.3333333 0.3333333 0.125 0.3333333 0.2
[,1] [,2] [,3]
[1,] 9 9 9
[,1] [,2] [,3]
[1,] 1 4 7 [2,] 2 5 8 [3,] 3 6 9
[,1] [,2] [,3]
[1,] 1.2 1.2 2.2 [2,] 2.8 2.2 3.5 [3,] 1.2 2.6 2.3
# no forma parte del curso
# creacion de numeros aleatorios, sujetos al azar
# generar aleatorios, med
hist(rnorm(100,5,1))[1] 3.171381 3.143956 4.167558 4.779001 4.663749 2.615840 1.475311 3.160375 [9] 1.247607 1.610476
[,1] [,2] [,3]
[1,] 2 4 3 [2,] 3 4 4 [3,] 2 1 3
[,1] [,2] [,3]
[1,] -2.6666667 3.000000e+00 -1.3333333 [2,] 0.3333333 -6.661338e-17 -0.3333333 [3,] 1.6666667 -2.000000e+00 1.3333333
[1] -3
[1] 9 11 6
[1] 7 9 10
[1] 3.000000 3.666667 2.000000
[1] 2.333333 3.000000 3.333333
# multiplicacion por un vector y resolucion de evuaciones
r=c(1,1,1)
# multiplicacion por un vector
# numero de columas del primer elementos es igual al numero de filas del segundo
matriz1%*%r [,1]
[1,] 9 [2,] 11 [3,] 6
[1] -1 0 1
# subconjuntos en matrices
# se pueden formar subconjuntos e imputar NAs
datos=matrix(round(runif(25)),ncol = 5)
datos [,1] [,2] [,3] [,4] [,5]
[1,] 0 0 1 1 0 [2,] 1 1 1 1 1 [3,] 0 0 1 1 1 [4,] 1 0 1 1 1 [5,] 1 0 1 0 1
[1] 0 1 1 1 1
[1] 1 1 1 1 1
[,1] [,2]
[1,] 0 0 [2,] 1 1 [3,] 0 1 [4,] 1 1 [5,] 1 1
[,1] [,2] [,3]
[1,] 0 1 1 [2,] 1 1 1 [3,] 0 1 1 [4,] 0 1 1 [5,] 0 1 0
[1] 1 1 1
# bases de datos
datos=data.frame(var1=c(1,2),
var2=c('azul','rojo'),
var3=c('si','no')
)
#para visualizar la base de datos hay varias formas
# las columnas son variables
colnames(datos)=c('v1','v2','v3')
colnames(datos)=toupper(names(datos))
colnames(datos)=tolower(names(datos))
# imprimir base de datos
datosv1 v2 v3 1 1 azul si 2 2 rojo no
v1 v2 v3
Min. :1.00 Length:2 Length:2
1st Qu.:1.25 Class :character Class :character
Median :1.50 Mode :character Mode :character
Mean :1.50
3rd Qu.:1.75
Max. :2.00
[1] 2 3
‘data.frame’: 2 obs. of 3 variables: $ v1: num 1 2 $ v2: chr “azul” “rojo” $ v3: chr “si” “no”
[1] 1 2
[1] 1.5
[1] 1.5
azul rojo 1 1
no si
azul 0 1 rojo 1 0
detach(datos)
# hay diferentes tipos de base de datos
# tipo extendido y de panel
# listas
# solamente hay algo mas que las lista
# un vector no se puede combinar
vector1=c(1,"si",'azul')
vector2=list(x=1, y='si',z='azul')
# extracion de listas
vector2$x[1][1] 1
[1] NA
# la lista puede guardar diversas bases de datos o combinar un escalar un vector y una bae de datos
a=1
x=runif(100,20,35)
datosv1 v2 v3 1 1 azul si 2 2 rojo no
v2
1 azul 2 rojo
var1 var2 1 1 si 2 2 no
‘data.frame’: 2 obs. of 2 variables: $ var1: num 1 2 $ var2: chr “si” “no”
## cambiar de nombre a las columnas
colnames(datos)=c('a1','a2')
# para pasar el nombre de las variables a mayusculas
colnames(datos)=toupper(names(datos))
datosA1 A2 1 1 si 2 2 no
| a1 | a2 |
|---|---|
| 1 | si |
| 2 | no |
# Listas
datos=data.frame(var1=c(1,2),
var2=c('si','no')
)
# defino un escalar
a1=1
# defino un vector
a2=c('rojo','azul')
# tengo una base de datos
datosvar1 var2 1 1 si 2 2 no
[[1]] [1] 1
[[2]] [1] “rojo” “azul”
[[3]] var1 var2 1 1 si 2 2 no
var1 var2 1 1 si 2 2 no
var1 var2 1 1 si 2 2 no
# Bases de datos
datos_1=data.frame(a1=runif(10,60,80),
a2=runif(10,1.50,1.80),
a3=sample(c('M','H'),10,replace=TRUE)
)
# subconjuntos, estadisticas y cuadros basicos
datos_1 a1 a2 a3
1 70.42271 1.752819 M 2 73.19677 1.569349 M 3 76.43611 1.571730 M 4 75.72563 1.523007 M 5 79.59644 1.573717 M 6 68.78863 1.719641 M 7 66.23404 1.754236 M 8 68.18950 1.649258 H 9 60.20934 1.616373 H 10 63.67699 1.573935 H
‘data.frame’: 10 obs. of 3 variables: $ a1: num 70.4 73.2 76.4 75.7 79.6 … $ a2: num 1.75 1.57 1.57 1.52 1.57 … $ a3: chr “M” “M” “M” “M” …
[1] 10 3
# se pueden formas subconjuntos
# nombre de la base de datos mas corchetes
# si quiero obtener la tercera variable
datos_1[,3][1] “M” “M” “M” “M” “M” “M” “M” “H” “H” “H”
a1 a2 a3
1 70.42271 1.752819 M 2 73.19677 1.569349 M
[1] 68.1895
a1 a2 a3
Min. :60.21 Min. :1.523 Length:10
1st Qu.:66.72 1st Qu.:1.572 Class :character
Median :69.61 Median :1.595 Mode :character
Mean :70.25 Mean :1.630
3rd Qu.:75.09 3rd Qu.:1.702
Max. :79.60 Max. :1.754
a1 a2 a3
1 70.42271 1.752819 M 2 73.19677 1.569349 M 3 76.43611 1.571730 M 4 75.72563 1.523007 M 5 79.59644 1.573717 M 6 68.78863 1.719641 M 7 66.23404 1.754236 M 8 NA 1.649258 H 9 60.20934 1.616373 H 10 63.67699 1.573935 H
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE
a1 a2 a3
1 70.42271 1.752819 M 2 73.19677 1.569349 M 3 76.43611 1.571730 M 4 75.72563 1.523007 M 5 79.59644 1.573717 M 6 68.78863 1.719641 M 7 66.23404 1.754236 M 9 60.20934 1.616373 H 10 63.67699 1.573935 H
# graficas
# una variable numérica
# diagrama de dispersión
plot(datos_1$a2,datos_1$a1,col='#0C8488', cex.axis=.8,pch=15,
main='Primera gráfica',ylab='Pesos',xlab='Estaturas') ...1 country continent year
Min. : 1 Length:101 Length:101 Min. :2002
1st Qu.: 26 Class :character Class :character 1st Qu.:2002
Median : 51 Mode :character Mode :character Median :2002
Mean : 51 Mean :2002
3rd Qu.: 76 3rd Qu.:2002
Max. :101 Max. :2002
lifeexp pop gdppercap grupos_ing
Min. :39.19 Min. :5.147e+06 Min. : 241.2 Length:101
1st Qu.:56.37 1st Qu.:9.771e+06 1st Qu.: 1287.5 Class :character
Median :70.75 Median :1.847e+07 Median : 4563.8 Mode :character
Mean :65.72 Mean :5.728e+07 Mean : 9561.9
3rd Qu.:74.90 3rd Qu.:4.101e+07 3rd Qu.:13638.8
Max. :82.00 Max. :1.280e+09 Max. :39097.1
par(margin=c(2,2,2,2),mfrow=c(2,2))
hist(gm_2002$gdppercap,col='#117A7E',
xlab='Ingreso',main='Gráfica 1')
abline(v=mean(gm_2002$gdppercap),lty=2)
# numero 2, diagrama de dispersion
plot(gm_2002$gdppercap,gm_2002$lifeexp,col='#117A7E',
ylab='Esperanza de vida',xlab='Ingreso')
# tercera grafica
# variable numerica con categorica
boxplot(gm_2002$gdppercap~gm_2002$grupos_ing,
col="#117A7E", horizontal = TRUE, xlab='Categorias',
ylab='Ingreso')
# cuarta grafica
# dos variables categoricas
barplot(xtabs(~gm_2002$continent+gm_2002$grupos_ing),
legend.text = TRUE,horiz = TRUE,args.legend = list(x = "bottomright"),
col=c('#4BE529','#2CD0B0','#75DE7D','#1E3920','#A2F2FF'),
cex.axis=.5, cex=.5)# Series de tiempo
petroleo=pdfetch_YAHOO("cl=f",
fields='close', from="2001-01-10")
colnames(petroleo)='petroleo'
par(mfrow=c(1,2))
plot(petroleo,col="#784491",main='')
plot(window(petroleo,st='2010-01-01'),col='#FA167B',main='')library(tidyverse)
# funciones básicas
# funciones
# tienen nombre, argumentos y el cuerpo
# las funciones más simples se encadenan
x=runif(20,10,90)
calcular=function(x){
x%>%
log()
}
# calculamando la media
calcular=function(x){
x%>%
log()%>%
mean()
}
calcular=function(x){
x%>%
log()%>%
mean()*100
}
calcular(x=c(10,100,1000))[1] 460.517
# indice de extremos
y=runif(100,2,90)
rango=function(x){
rango=max(x)-min(x)
rango=rango/mean(x)
return(rango)
}
rango(y)[1] 1.90648
# aplicando un condcional
# dos vectores uno con la varianza y otro con la deviacion estándar
probar=function(x){
if(x<5){
return('Hola Mexico')
}
else{
return('Hola mundo')
}
}
probar(10)[1] "Hola mundo"
[1] 9
# calculando la media
media=function(x,n){
n=length(x)
suma=sum(x)
media=suma/n
return(media)
}
media(x=rnorm(20))[1] 0.1174799
[1] 3.571429
[1] NA
# para calcular la media se necetitan remover los nas
media=function(x,n){
x=x[which(!is.na(x))]
n=length(x)
suma=sum(x)
media=suma/n
return(media)
}
media(var1)[1] 4
# la tasa de crecimiento
tasa_de_crecimiento=function(x){
dis=diff(x)
tasa=(dis/lag(x))*100
return(tasa)
}
var2=runif(20,20,90)
tasa_de_crecimiento(var2) [1] NA 50.004128 16.828141 -50.784328 15.922555 34.135386
[7] -7.183964 0.250289 36.015844 -60.573960 -23.372753 58.051981
[13] -74.288360 32.194686 -22.748586 79.408716 -90.715762 -12.534767
[19] 58.461896 -64.267221
50%
55.65087
##############
# dos vectores
a1=rnorm(10)
a2=rnorm(10)
#######################
sumar=function(x,y){
if(y>=2){
a1=log(x)
}else{
a2=x+y
}
return(list(a1,a2))
}
sumar(3,4)[[1]]
[1] 1.098612
[[2]]
[1] -0.19717589 1.10992029 0.08473729 0.75405379 -0.49929202 0.21444531
[7] -0.32468591 0.09458353 -0.89536336 -1.31080153