apply_list

data(ChickWeight)
?ChickWeight
## starting httpd help server ... done
sapply(ChickWeight, function(x) length(unique(x)))
## weight   Time  Chick   Diet 
##    212     12     50      4
tapply(ChickWeight$weight,ChickWeight$Diet,mean)
##        1        2        3        4 
## 102.6455 122.6167 142.9500 135.2627
my.splits = split(ChickWeight, ChickWeight$Diet)
length(my.splits)
## [1] 4
names(my.splits)
## [1] "1" "2" "3" "4"
head(my.splits[[1]])
##   weight Time Chick Diet
## 1     42    0     1    1
## 2     51    2     1    1
## 3     59    4     1    1
## 4     64    6     1    1
## 5     76    8     1    1
## 6     93   10     1    1
head(my.splits[[3]])
##     weight Time Chick Diet
## 341     42    0    31    3
## 342     53    2    31    3
## 343     62    4    31    3
## 344     73    6    31    3
## 345     85    8    31    3
## 346    102   10    31    3
my.results.lapply = lapply(my.splits, subset,
                           weight <= 40)
my.results.lapply
## $`1`
##     weight Time Chick Diet
## 13      40    0     2    1
## 26      39    2     3    1
## 195     39    0    18    1
## 196     35    2    18    1
## 
## $`2`
##     weight Time Chick Diet
## 221     40    0    21    2
## 269     40    0    25    2
## 293     39    0    27    2
## 305     39    0    28    2
## 317     39    0    29    2
## 
## $`3`
##     weight Time Chick Diet
## 365     39    0    33    3
## 401     39    0    36    3
## 
## $`4`
##     weight Time Chick Diet
## 519     40    0    46    4
## 543     39    0    48    4
## 555     40    0    49    4
my.results.lapply = lapply(my.splits, function(x) 
  subset(x, weight <= 40) )
my.results.lapply
## $`1`
##     weight Time Chick Diet
## 13      40    0     2    1
## 26      39    2     3    1
## 195     39    0    18    1
## 196     35    2    18    1
## 
## $`2`
##     weight Time Chick Diet
## 221     40    0    21    2
## 269     40    0    25    2
## 293     39    0    27    2
## 305     39    0    28    2
## 317     39    0    29    2
## 
## $`3`
##     weight Time Chick Diet
## 365     39    0    33    3
## 401     39    0    36    3
## 
## $`4`
##     weight Time Chick Diet
## 519     40    0    46    4
## 543     39    0    48    4
## 555     40    0    49    4

my.func <- function(x) {
  subset(x, weight <= 40)
}

my.results.lapply = lapply(my.splits, my.func)
my.results.lapply
## $`1`
##     weight Time Chick Diet
## 13      40    0     2    1
## 26      39    2     3    1
## 195     39    0    18    1
## 196     35    2    18    1
## 
## $`2`
##     weight Time Chick Diet
## 221     40    0    21    2
## 269     40    0    25    2
## 293     39    0    27    2
## 305     39    0    28    2
## 317     39    0    29    2
## 
## $`3`
##     weight Time Chick Diet
## 365     39    0    33    3
## 401     39    0    36    3
## 
## $`4`
##     weight Time Chick Diet
## 519     40    0    46    4
## 543     39    0    48    4
## 555     40    0    49    4
my.results.lapply = lapply(1:length(my.splits),
            function(x) subset(my.splits[[x]], 
                               weight <= 40))
my.results.lapply
## [[1]]
##     weight Time Chick Diet
## 13      40    0     2    1
## 26      39    2     3    1
## 195     39    0    18    1
## 196     35    2    18    1
## 
## [[2]]
##     weight Time Chick Diet
## 221     40    0    21    2
## 269     40    0    25    2
## 293     39    0    27    2
## 305     39    0    28    2
## 317     39    0    29    2
## 
## [[3]]
##     weight Time Chick Diet
## 365     39    0    33    3
## 401     39    0    36    3
## 
## [[4]]
##     weight Time Chick Diet
## 519     40    0    46    4
## 543     39    0    48    4
## 555     40    0    49    4
my.df = do.call(rbind, my.results.lapply)####!!!
my.df
##     weight Time Chick Diet
## 13      40    0     2    1
## 26      39    2     3    1
## 195     39    0    18    1
## 196     35    2    18    1
## 221     40    0    21    2
## 269     40    0    25    2
## 293     39    0    27    2
## 305     39    0    28    2
## 317     39    0    29    2
## 365     39    0    33    3
## 401     39    0    36    3
## 519     40    0    46    4
## 543     39    0    48    4
## 555     40    0    49    4
#####
my.results.for = list() 
for (ii in 1:length(my.splits)) {
  my.results.for[[ii]] = subset(my.splits[[ii]], 
                                weight <= 40)
}

names(my.results.for) = names(my.splits)

all.equal(my.results.lapply, my.results.for) # Should be equal to my.results.lapply
## [1] "names for current but not for target"
###
lapply(split(ChickWeight,ChickWeight$Diet), 
       subset, weight <= 40)
## $`1`
##     weight Time Chick Diet
## 13      40    0     2    1
## 26      39    2     3    1
## 195     39    0    18    1
## 196     35    2    18    1
## 
## $`2`
##     weight Time Chick Diet
## 221     40    0    21    2
## 269     40    0    25    2
## 293     39    0    27    2
## 305     39    0    28    2
## 317     39    0    29    2
## 
## $`3`
##     weight Time Chick Diet
## 365     39    0    33    3
## 401     39    0    36    3
## 
## $`4`
##     weight Time Chick Diet
## 519     40    0    46    4
## 543     39    0    48    4
## 555     40    0    49    4
# OR
(do.call(rbind, lapply(split(ChickWeight,
                             ChickWeight$Diet), 
                       subset, weight <= 40)))
##       weight Time Chick Diet
## 1.13      40    0     2    1
## 1.26      39    2     3    1
## 1.195     39    0    18    1
## 1.196     35    2    18    1
## 2.221     40    0    21    2
## 2.269     40    0    25    2
## 2.293     39    0    27    2
## 2.305     39    0    28    2
## 2.317     39    0    29    2
## 3.365     39    0    33    3
## 3.401     39    0    36    3
## 4.519     40    0    46    4
## 4.543     39    0    48    4
## 4.555     40    0    49    4
###!!!!
sorted.chickens = ChickWeight[order(ChickWeight$Diet),]
(sorted.chickens = subset(sorted.chickens, weight <= 40))
##     weight Time Chick Diet
## 13      40    0     2    1
## 26      39    2     3    1
## 195     39    0    18    1
## 196     35    2    18    1
## 221     40    0    21    2
## 269     40    0    25    2
## 293     39    0    27    2
## 305     39    0    28    2
## 317     39    0    29    2
## 365     39    0    33    3
## 401     39    0    36    3
## 519     40    0    46    4
## 543     39    0    48    4
## 555     40    0    49    4
####

tapply(ChickWeight$weight,ChickWeight$Chick,mean)
##        18        16        15        13         9        20        10 
##  37.00000  49.71429  60.12500  67.83333  81.16667  78.41667  83.08333 
##         8        17        19         4         6        11         3 
##  92.00000  92.50000  86.75000  99.33333 113.75000 129.91667 115.83333 
##         1        12         2         5        14         7        24 
## 111.66667 114.08333 119.91667 126.66667 151.33333 150.00000  66.25000 
##        30        22        23        27        28        26        25 
## 103.50000 104.25000 111.41667 110.41667 129.91667 131.00000 143.08333 
##        29        21        33        37        36        31        39 
## 141.83333 184.50000 109.75000 102.50000 134.91667 128.58333 134.25000 
##        38        32        40        34        35        44        45 
## 142.33333 157.58333 157.58333 168.83333 193.16667 102.10000 119.58333 
##        43        41        47        49        46        50        42 
## 143.00000 128.41667 127.91667 137.75000 134.08333 147.50000 149.08333 
##        48 
## 157.66667
#############################################
(somevec <- c(1,4,5,"4","5"))
## [1] "1" "4" "5" "4" "5"
# Let's do some regression using the mtcars data frame

mylm <- lm(mpg~wt, data = mtcars)
#str(mylm)
# What type of structure do we get back ? A list with 12 sub elements
str(mylm,max.level=0)
## List of 12
##  - attr(*, "class")= chr "lm"
##
surname <- "Jones"
numofchild <- 2
ages <- c(5,7)
measles <- c("Y","N")

family1 <- list(name="Jones",numofchild=2,ages=c(5,7),measles=c("Y","N"))
str(family1)
## List of 4
##  $ name      : chr "Jones"
##  $ numofchild: num 2
##  $ ages      : num [1:2] 5 7
##  $ measles   : chr [1:2] "Y" "N"
family1$ages
## [1] 5 7
family1$measles
## [1] "Y" "N"
family1[2]
## $numofchild
## [1] 2
family1[[2]]
## [1] 2
family1$ages[1]
## [1] 5
# We could pull out both ages using this approach
family1$ages[1:2]
## [1] 5 7
# But this is the same as this:
family1$ages
## [1] 5 7
# Which is the same as this:
family1[[3]]
## [1] 5 7
family1 <- list("Jones",2,c(5,7),c("Y","N"))
# So when we print the list results we see only brackets - no names.
family1
## [[1]]
## [1] "Jones"
## 
## [[2]]
## [1] 2
## 
## [[3]]
## [1] 5 7
## 
## [[4]]
## [1] "Y" "N"
names(family1) <- c("name","numofchild","ages","measles")
family1
## $name
## [1] "Jones"
## 
## $numofchild
## [1] 2
## 
## $ages
## [1] 5 7
## 
## $measles
## [1] "Y" "N"
#Introducing lapply
lapply(family1, typeof)
## $name
## [1] "character"
## 
## $numofchild
## [1] "double"
## 
## $ages
## [1] "double"
## 
## $measles
## [1] "character"
str(lapply(family1, typeof))
## List of 4
##  $ name      : chr "character"
##  $ numofchild: chr "double"
##  $ ages      : chr "double"
##  $ measles   : chr "character"
lapply(family1, function(x) typeof(x))
## $name
## [1] "character"
## 
## $numofchild
## [1] "double"
## 
## $ages
## [1] "double"
## 
## $measles
## [1] "character"
###
simplefunc <- function(x) {
  mytype <- typeof(x)
  return(mytype)
}
lapply(family1, simplefunc)
## $name
## [1] "character"
## 
## $numofchild
## [1] "double"
## 
## $ages
## [1] "double"
## 
## $measles
## [1] "character"
###
##
myfunc <- function(x) {
  if (is.numeric(x)) {
    mean(x)
  }
} 
# For numeric elements we get a meaningful result. For other elements we
# don't get back anything
lapply(family1, myfunc)
## $name
## NULL
## 
## $numofchild
## [1] 2
## 
## $ages
## [1] 6
## 
## $measles
## NULL
##

lapply(family1, function(x) {if (is.numeric(x)) 
  mean(x)})
## $name
## NULL
## 
## $numofchild
## [1] 2
## 
## $ages
## [1] 6
## 
## $measles
## NULL
#A list of lists !
family2 <- list(name="Espinoza",numofchild=4,ages=c(5,7,9,11),measles=c("Y","N","Y","Y"))
family3 <- list(name="Ginsberg",numofchild=3,ages=c(9,13,18),measles=c("Y","N","Y"))
family4 <- list(name="Souza",numofchild=5,
                ages=c(3,5,7,9,11),
                measles=c("N","Y","Y","Y","N"))

allfams <- list(f1=family1,f2=family2,f3=family3,
                f4=family4)

str(allfams,max.level=1)
## List of 4
##  $ f1:List of 4
##  $ f2:List of 4
##  $ f3:List of 4
##  $ f4:List of 4
allfams$f3$ages   # Get the ages of Family 3
## [1]  9 13 18
lapply(allfams, function(x) mean(x$ages))
## $f1
## [1] 6
## 
## $f2
## [1] 8
## 
## $f3
## [1] 13.33333
## 
## $f4
## [1] 7
unlist(lapply(allfams, function(x) mean(x$ages)))
##       f1       f2       f3       f4 
##  6.00000  8.00000 13.33333  7.00000
# So check out the following. It gives us exactly what we want.
mean(unlist(lapply(allfams, 
                   function(x) mean(x$ages))))
## [1] 8.583333
# An "expanded" version of this might have looked like:
mymeanages <- function(x) {
  return(mean(x$ages))
}
hold <- lapply(allfams, mymeanages)  # Get back a list with mean ages for each family
hold2 <- unlist(hold)    # Turn the result into a vector since everything is a numeric value
mean(hold2)   # Get the mean of all ages
## [1] 8.583333
hold <- lapply(allfams,function(x) {
  x$numofchild >= 2 & x$numofchild <= 3}) 
which(hold == T)
## f1 f3 
##  1  3
# Or we could it all in one go
which(lapply(allfams,function(x) {
  x$numofchild >= 2 & x$numofchild <= 3}) == T)
## f1 f3 
##  1  3
#Using the split command
unique(mtcars$cyl)  # Cylinder takes on three distinct values
## [1] 6 4 8
# We could split the data frame based on cylinder group.
mydfs <- split(mtcars,mtcars$cyl)
str(mydfs,max.level=1)###!!!
## List of 3
##  $ 4:'data.frame':   11 obs. of  11 variables:
##  $ 6:'data.frame':   7 obs. of  11 variables:
##  $ 8:'data.frame':   14 obs. of  11 variables:
fourcyl  <- mtcars[mtcars$cyl==4,]  
sixcyl   <- mtcars[mtcars$cyl==6,]
eightcyl <- mtcars[mtcars$cyl==8,]
names(mydfs)
## [1] "4" "6" "8"
mydfs$"4"
##                 mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Datsun 710     22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Merc 240D      24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230       22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Fiat 128       32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic    30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Toyota Corona  21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Fiat X1-9      27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Porsche 914-2  26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Lotus Europa   30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Volvo 142E     21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
mydfs <- split(mtcars,mtcars$cyl)     
lapply(mydfs,function(x) mean(x$mpg))
## $`4`
## [1] 26.66364
## 
## $`6`
## [1] 19.74286
## 
## $`8`
## [1] 15.1
# Okay cool but we could bundle this up in one statement
lapply(split(mtcars,mtcars$cyl),function(x) 
  mean(x$mpg))
## $`4`
## [1] 26.66364
## 
## $`6`
## [1] 19.74286
## 
## $`8`
## [1] 15.1
# Or more economically (though potentially confusing to a newcomer) 

unlist(lapply(split(mtcars,mtcars$cyl),function(x) 
  mean(x$mpg)))
##        4        6        8 
## 26.66364 19.74286 15.10000
# Which is identical to the tapply function. 
tapply(mtcars$mpg,mtcars$cyl,mean)
##        4        6        8 
## 26.66364 19.74286 15.10000