Asier Goikoetxea eta Gorka Kobeaga
2016/10/27
Presentación basada en el libro YaRrr! Pirate's guide to R
CRAN y Rstudio.com
Programar es un proceso artesanal:
# ******* instalar el paquete YaRrr! *******
#install.packages("devtools"); library(devtools)
#install.github("ndphillips/yarrr", build_vignette=TRUE)
library(yarrr)
palabras <- read.csv("unigram_ws.csv")
head(palabras)
X Content Frequency
1 1 the 333505
2 2 to 191753
3 3 and 168449
4 4 a 166493
5 5 of 140199
6 6 i 115619
nrow(pirates)
[1] 1000
names(pirates)
[1] "id" "sex" "age"
[4] "height" "weight" "headband"
[7] "college" "tattoos" "tchests"
[10] "parrots" "favorite.pirate" "sword.type"
[13] "eyepatch" "sword.time" "beard.length"
[16] "fav.pixar" "grogg"
Head / Tail
head(pirates[, 1:6])
id sex age height weight headband
1 1 male 28 173.11 70.5 yes
2 2 male 31 209.25 105.6 yes
3 3 male 26 169.95 77.1 yes
4 4 female 31 144.29 58.5 no
5 5 female 41 157.85 58.4 yes
6 6 male 26 190.20 85.4 yes
Filtrar por Columnas [ , X] / Filtrar por Filas [X, ] Filtrar por condiciones lógicas: subset() Asignar valor a una variable '<-' / 'alt -'
muestra <- pirates[5:15 ,1:3]
subset(muestra, sex=="male")
id sex age
6 6 male 26
10 10 male 30
12 12 male 20
14 14 male 26
Comparar dos variables u objetos:
12==2 #12!=2
[1] FALSE
12>=2
[1] TRUE
12<2
[1] FALSE
laborable <- c("lunes", "martes", "miercoles","jueves", "viernes")
hoy <- "jueves"
hoy%in%laborable
[1] TRUE
summary(pirates$age)
Min. 1st Qu. Median Mean 3rd Qu. Max.
11.00 24.00 27.00 27.36 31.00 46.00
table(pirates$sex)
female male other
464 490 46
table(pirates$sex, pirates$sword.type)
banana cutlass sabre scimitar
female 21 379 31 33
male 22 414 32 22
other 3 37 4 2
mean, median, range, var, sd, iqr, max, min
aggregate(age ~ sex, data = pirates, FUN = mean)
sex age
1 female 29.92241
2 male 24.96735
3 other 27.00000
aggregate(age ~ sex + sword.type, data = pirates, FUN = mean)
sex sword.type age
1 female banana 31.14286
2 male banana 24.54545
3 other banana 27.00000
4 female cutlass 29.91821
5 male cutlass 25.11353
6 other cutlass 26.91892
7 female sabre 28.35484
8 male sabre 24.21875
9 other sabre 28.50000
10 female scimitar 30.66667
11 male scimitar 23.72727
12 other scimitar 25.50000
hist(x=pirates$tattoos, main="Frequencia del número de Tatuajes de los Piratas", xlab = "Número de Tatuajes", ylab = "Frequencia", col="skyblue", border="white")
boxplot(age ~ sword.type, data= pirates, col= c("red", "blue", "green", "orange"))
plot(x= pirates$weight, y= pirates$height, main= "Relación entre Peso y la Altura de los Piratas", xlab= "Peso (kg)", ylab = "Altura (cm)", pch=16, col=gray(.0,.4))
linea <- lm(height ~ weight, data = pirates); abline(linea, col="blue", lwd=3)
library(circlize)
chordDiagram(matrix(sample(10), nrow = 2,ncol = 5))
# Create a pirateplot showing the distribution of ages
pirateplot(formula = age ~ sword.type,data = pirates, main = "Pirateplot of ages by favorite sword")
trabajo <- 0
for (dia in laborable) {
trabajo <- 8 + trabajo
mensaje <- paste0(dia, ": suma de horas trabajadas => ", trabajo)
print(mensaje)
}
[1] "lunes: suma de horas trabajadas => 8"
[1] "martes: suma de horas trabajadas => 16"
[1] "miercoles: suma de horas trabajadas => 24"
[1] "jueves: suma de horas trabajadas => 32"
[1] "viernes: suma de horas trabajadas => 40"
gsub("trabajadas", "programando", mensaje)
[1] "viernes: suma de horas programando => 40"
gsub("[aeiou]", "_", mensaje)
[1] "v__rn_s: s_m_ d_ h_r_s tr_b_j_d_s => 40"