# the R book por Michael Crawley
# capitulo 4
# 4.13 Mergiendo dos dataframes
setwd("C:\\Users\\Luis\\Documents\\therbook")
formasdevida <- read.table("c:\\Users\\Luis\\Documents\\therbook\\lifeforms.txt",
                           header = TRUE)
formasdevida                           
##    Genus     species lifeform
## 1   Acer platanoides     tree
## 2   Acer    palmatum     tree
## 3  Ajuga     reptans     herb
## 4 Conyza sumatrensis   annual
## 5 Lamium       album     herb
florece <- read.table("c:\\Users\\Luis\\Documents\\therbook\\fltimes.txt",
                           header = TRUE)
florece
##       Genus       species flowering
## 1      Acer   platanoides       May
## 2     Ajuga       reptans      June
## 3  Brassica         napus     April
## 4 Chamerion angustifolium      July
## 5    Conyza     bilbaoana    August
## 6    Lamium         album   January
# Debido a que al menos una de los nombres de las variables es identico
# en las dos dataframes(dos en este caso), 
#  Genus y species)podemos usar 
# el mas simple de los comandos para merger:
merge(formasdevida,florece)
##    Genus     species lifeform flowering
## 1   Acer platanoides     tree       May
## 2  Ajuga     reptans     herb      June
## 3 Lamium       album     herb   January
# tambien se puede:
(both <-merge(formasdevida,florece,all = TRUE))
##       Genus       species lifeform flowering
## 1      Acer      palmatum     tree      <NA>
## 2      Acer   platanoides     tree       May
## 3     Ajuga       reptans     herb      June
## 4    Conyza   sumatrensis   annual      <NA>
## 5    Conyza     bilbaoana     <NA>    August
## 6    Lamium         album     herb   January
## 7  Brassica         napus     <NA>     April
## 8 Chamerion angustifolium     <NA>      July
semilla <- read.table("c:\\Users\\Luis\\Documents\\therbook\\seedwts.txt",
                      header = TRUE)
semilla
##       name1         name2 seed
## 1      Acer   platanoides 32.0
## 2    Lamium         album 12.0
## 3     Ajuga       reptans  4.0
## 4 Chamerion angustifolium  1.5
## 5    Conyza     bilbaoana  0.5
## 6  Brassica         napus  7.0
## 7      Acer      palmatum 21.0
## 8    Conyza   sumatrensis  0.6
merge(both,semilla,by.x = c("Genus","species"),by.y = c("name1","name2"))
##       Genus       species lifeform flowering seed
## 1      Acer      palmatum     tree      <NA> 21.0
## 2      Acer   platanoides     tree       May 32.0
## 3     Ajuga       reptans     herb      June  4.0
## 4  Brassica         napus     <NA>     April  7.0
## 5 Chamerion angustifolium     <NA>      July  1.5
## 6    Conyza     bilbaoana     <NA>    August  0.5
## 7    Conyza   sumatrensis   annual      <NA>  0.6
## 8    Lamium         album     herb   January 12.0
# 4.15 Sumarios de contenidos de dataframes
worm <- read.table("C:\\Users\\Luis\\Documents\\therbook\\worms.txt",
                   header = TRUE,sep = "")
aggregate(worm[,c(2,3,5,7)],by=list(veg=worm$Vegetation),mean)
##         veg     Area    Slope  Soil.pH Worm.density
## 1    Arable 3.866667 1.333333 4.833333     5.333333
## 2 Grassland 2.911111 3.666667 4.100000     2.444444
## 3    Meadow 3.466667 1.666667 4.933333     6.333333
## 4   Orchard 1.900000 0.000000 5.700000     9.000000
## 5     Scrub 2.425000 7.000000 4.800000     5.250000
# El argumento de by  requiere que sea una lista aun si, como aqui, 
# tenemos un solo factor clasificante. Aqui se muestra
# sumarios de agregacion de clasificacion cruzada por Vegetation and Damp:
aggregate(worm[c(2,3,5,7)],by=list(veg=worm$Vegetation,d=worm$Damp),mean)
##         veg     d     Area    Slope  Soil.pH Worm.density
## 1    Arable FALSE 3.866667 1.333333 4.833333     5.333333
## 2 Grassland FALSE 3.087500 3.625000 3.987500     1.875000
## 3   Orchard FALSE 1.900000 0.000000 5.700000     9.000000
## 4     Scrub FALSE 3.350000 5.000000 4.700000     7.000000
## 5 Grassland  TRUE 1.500000 4.000000 5.000000     7.000000
## 6    Meadow  TRUE 3.466667 1.666667 4.933333     6.333333
## 7     Scrub  TRUE 1.500000 9.000000 4.900000     3.500000