# the R book por Michael Crawley
# capitulo 4
# 4.13 Mergiendo dos dataframes
setwd("C:\\Users\\Luis\\Documents\\therbook")
formasdevida <- read.table("c:\\Users\\Luis\\Documents\\therbook\\lifeforms.txt",
header = TRUE)
formasdevida
## Genus species lifeform
## 1 Acer platanoides tree
## 2 Acer palmatum tree
## 3 Ajuga reptans herb
## 4 Conyza sumatrensis annual
## 5 Lamium album herb
florece <- read.table("c:\\Users\\Luis\\Documents\\therbook\\fltimes.txt",
header = TRUE)
florece
## Genus species flowering
## 1 Acer platanoides May
## 2 Ajuga reptans June
## 3 Brassica napus April
## 4 Chamerion angustifolium July
## 5 Conyza bilbaoana August
## 6 Lamium album January
# Debido a que al menos una de los nombres de las variables es identico
# en las dos dataframes(dos en este caso),
# Genus y species)podemos usar
# el mas simple de los comandos para merger:
merge(formasdevida,florece)
## Genus species lifeform flowering
## 1 Acer platanoides tree May
## 2 Ajuga reptans herb June
## 3 Lamium album herb January
# tambien se puede:
(both <-merge(formasdevida,florece,all = TRUE))
## Genus species lifeform flowering
## 1 Acer palmatum tree <NA>
## 2 Acer platanoides tree May
## 3 Ajuga reptans herb June
## 4 Conyza sumatrensis annual <NA>
## 5 Conyza bilbaoana <NA> August
## 6 Lamium album herb January
## 7 Brassica napus <NA> April
## 8 Chamerion angustifolium <NA> July
semilla <- read.table("c:\\Users\\Luis\\Documents\\therbook\\seedwts.txt",
header = TRUE)
semilla
## name1 name2 seed
## 1 Acer platanoides 32.0
## 2 Lamium album 12.0
## 3 Ajuga reptans 4.0
## 4 Chamerion angustifolium 1.5
## 5 Conyza bilbaoana 0.5
## 6 Brassica napus 7.0
## 7 Acer palmatum 21.0
## 8 Conyza sumatrensis 0.6
merge(both,semilla,by.x = c("Genus","species"),by.y = c("name1","name2"))
## Genus species lifeform flowering seed
## 1 Acer palmatum tree <NA> 21.0
## 2 Acer platanoides tree May 32.0
## 3 Ajuga reptans herb June 4.0
## 4 Brassica napus <NA> April 7.0
## 5 Chamerion angustifolium <NA> July 1.5
## 6 Conyza bilbaoana <NA> August 0.5
## 7 Conyza sumatrensis annual <NA> 0.6
## 8 Lamium album herb January 12.0
# 4.15 Sumarios de contenidos de dataframes
worm <- read.table("C:\\Users\\Luis\\Documents\\therbook\\worms.txt",
header = TRUE,sep = "")
aggregate(worm[,c(2,3,5,7)],by=list(veg=worm$Vegetation),mean)
## veg Area Slope Soil.pH Worm.density
## 1 Arable 3.866667 1.333333 4.833333 5.333333
## 2 Grassland 2.911111 3.666667 4.100000 2.444444
## 3 Meadow 3.466667 1.666667 4.933333 6.333333
## 4 Orchard 1.900000 0.000000 5.700000 9.000000
## 5 Scrub 2.425000 7.000000 4.800000 5.250000
# El argumento de by requiere que sea una lista aun si, como aqui,
# tenemos un solo factor clasificante. Aqui se muestra
# sumarios de agregacion de clasificacion cruzada por Vegetation and Damp:
aggregate(worm[c(2,3,5,7)],by=list(veg=worm$Vegetation,d=worm$Damp),mean)
## veg d Area Slope Soil.pH Worm.density
## 1 Arable FALSE 3.866667 1.333333 4.833333 5.333333
## 2 Grassland FALSE 3.087500 3.625000 3.987500 1.875000
## 3 Orchard FALSE 1.900000 0.000000 5.700000 9.000000
## 4 Scrub FALSE 3.350000 5.000000 4.700000 7.000000
## 5 Grassland TRUE 1.500000 4.000000 5.000000 7.000000
## 6 Meadow TRUE 3.466667 1.666667 4.933333 6.333333
## 7 Scrub TRUE 1.500000 9.000000 4.900000 3.500000