my.func <- function(x) {
subset(x, weight <= 40)
}
my.results.lapply = lapply(my.splits, my.func)
my.results.lapply
## $`1`
## weight Time Chick Diet
## 13 40 0 2 1
## 26 39 2 3 1
## 195 39 0 18 1
## 196 35 2 18 1
##
## $`2`
## weight Time Chick Diet
## 221 40 0 21 2
## 269 40 0 25 2
## 293 39 0 27 2
## 305 39 0 28 2
## 317 39 0 29 2
##
## $`3`
## weight Time Chick Diet
## 365 39 0 33 3
## 401 39 0 36 3
##
## $`4`
## weight Time Chick Diet
## 519 40 0 46 4
## 543 39 0 48 4
## 555 40 0 49 4
my.results.lapply = lapply(1:length(my.splits),
function(x) subset(my.splits[[x]],
weight <= 40))
my.results.lapply
## [[1]]
## weight Time Chick Diet
## 13 40 0 2 1
## 26 39 2 3 1
## 195 39 0 18 1
## 196 35 2 18 1
##
## [[2]]
## weight Time Chick Diet
## 221 40 0 21 2
## 269 40 0 25 2
## 293 39 0 27 2
## 305 39 0 28 2
## 317 39 0 29 2
##
## [[3]]
## weight Time Chick Diet
## 365 39 0 33 3
## 401 39 0 36 3
##
## [[4]]
## weight Time Chick Diet
## 519 40 0 46 4
## 543 39 0 48 4
## 555 40 0 49 4
my.df = do.call(rbind, my.results.lapply)####!!!
my.df
## weight Time Chick Diet
## 13 40 0 2 1
## 26 39 2 3 1
## 195 39 0 18 1
## 196 35 2 18 1
## 221 40 0 21 2
## 269 40 0 25 2
## 293 39 0 27 2
## 305 39 0 28 2
## 317 39 0 29 2
## 365 39 0 33 3
## 401 39 0 36 3
## 519 40 0 46 4
## 543 39 0 48 4
## 555 40 0 49 4
#####
my.results.for = list()
for (ii in 1:length(my.splits)) {
my.results.for[[ii]] = subset(my.splits[[ii]],
weight <= 40)
}
names(my.results.for) = names(my.splits)
all.equal(my.results.lapply, my.results.for) # Should be equal to my.results.lapply
## [1] "names for current but not for target"
###
lapply(split(ChickWeight,ChickWeight$Diet),
subset, weight <= 40)
## $`1`
## weight Time Chick Diet
## 13 40 0 2 1
## 26 39 2 3 1
## 195 39 0 18 1
## 196 35 2 18 1
##
## $`2`
## weight Time Chick Diet
## 221 40 0 21 2
## 269 40 0 25 2
## 293 39 0 27 2
## 305 39 0 28 2
## 317 39 0 29 2
##
## $`3`
## weight Time Chick Diet
## 365 39 0 33 3
## 401 39 0 36 3
##
## $`4`
## weight Time Chick Diet
## 519 40 0 46 4
## 543 39 0 48 4
## 555 40 0 49 4
# OR
(do.call(rbind, lapply(split(ChickWeight,
ChickWeight$Diet),
subset, weight <= 40)))
## weight Time Chick Diet
## 1.13 40 0 2 1
## 1.26 39 2 3 1
## 1.195 39 0 18 1
## 1.196 35 2 18 1
## 2.221 40 0 21 2
## 2.269 40 0 25 2
## 2.293 39 0 27 2
## 2.305 39 0 28 2
## 2.317 39 0 29 2
## 3.365 39 0 33 3
## 3.401 39 0 36 3
## 4.519 40 0 46 4
## 4.543 39 0 48 4
## 4.555 40 0 49 4
###!!!!
sorted.chickens = ChickWeight[order(ChickWeight$Diet),]
(sorted.chickens = subset(sorted.chickens, weight <= 40))
## weight Time Chick Diet
## 13 40 0 2 1
## 26 39 2 3 1
## 195 39 0 18 1
## 196 35 2 18 1
## 221 40 0 21 2
## 269 40 0 25 2
## 293 39 0 27 2
## 305 39 0 28 2
## 317 39 0 29 2
## 365 39 0 33 3
## 401 39 0 36 3
## 519 40 0 46 4
## 543 39 0 48 4
## 555 40 0 49 4
####
tapply(ChickWeight$weight,ChickWeight$Chick,mean)
## 18 16 15 13 9 20 10
## 37.00000 49.71429 60.12500 67.83333 81.16667 78.41667 83.08333
## 8 17 19 4 6 11 3
## 92.00000 92.50000 86.75000 99.33333 113.75000 129.91667 115.83333
## 1 12 2 5 14 7 24
## 111.66667 114.08333 119.91667 126.66667 151.33333 150.00000 66.25000
## 30 22 23 27 28 26 25
## 103.50000 104.25000 111.41667 110.41667 129.91667 131.00000 143.08333
## 29 21 33 37 36 31 39
## 141.83333 184.50000 109.75000 102.50000 134.91667 128.58333 134.25000
## 38 32 40 34 35 44 45
## 142.33333 157.58333 157.58333 168.83333 193.16667 102.10000 119.58333
## 43 41 47 49 46 50 42
## 143.00000 128.41667 127.91667 137.75000 134.08333 147.50000 149.08333
## 48
## 157.66667
#############################################
(somevec <- c(1,4,5,"4","5"))
## [1] "1" "4" "5" "4" "5"
# Let's do some regression using the mtcars data frame
mylm <- lm(mpg~wt, data = mtcars)
#str(mylm)
# What type of structure do we get back ? A list with 12 sub elements
str(mylm,max.level=0)
## List of 12
## - attr(*, "class")= chr "lm"
##
surname <- "Jones"
numofchild <- 2
ages <- c(5,7)
measles <- c("Y","N")
family1 <- list(name="Jones",numofchild=2,ages=c(5,7),measles=c("Y","N"))
str(family1)
## List of 4
## $ name : chr "Jones"
## $ numofchild: num 2
## $ ages : num [1:2] 5 7
## $ measles : chr [1:2] "Y" "N"
family1$ages
## [1] 5 7
family1$measles
## [1] "Y" "N"
family1[2]
## $numofchild
## [1] 2
family1[[2]]
## [1] 2
family1$ages[1]
## [1] 5
# We could pull out both ages using this approach
family1$ages[1:2]
## [1] 5 7
# But this is the same as this:
family1$ages
## [1] 5 7
# Which is the same as this:
family1[[3]]
## [1] 5 7
family1 <- list("Jones",2,c(5,7),c("Y","N"))
# So when we print the list results we see only brackets - no names.
family1
## [[1]]
## [1] "Jones"
##
## [[2]]
## [1] 2
##
## [[3]]
## [1] 5 7
##
## [[4]]
## [1] "Y" "N"
names(family1) <- c("name","numofchild","ages","measles")
family1
## $name
## [1] "Jones"
##
## $numofchild
## [1] 2
##
## $ages
## [1] 5 7
##
## $measles
## [1] "Y" "N"
#Introducing lapply
lapply(family1, typeof)
## $name
## [1] "character"
##
## $numofchild
## [1] "double"
##
## $ages
## [1] "double"
##
## $measles
## [1] "character"
str(lapply(family1, typeof))
## List of 4
## $ name : chr "character"
## $ numofchild: chr "double"
## $ ages : chr "double"
## $ measles : chr "character"
lapply(family1, function(x) typeof(x))
## $name
## [1] "character"
##
## $numofchild
## [1] "double"
##
## $ages
## [1] "double"
##
## $measles
## [1] "character"
###
simplefunc <- function(x) {
mytype <- typeof(x)
return(mytype)
}
lapply(family1, simplefunc)
## $name
## [1] "character"
##
## $numofchild
## [1] "double"
##
## $ages
## [1] "double"
##
## $measles
## [1] "character"
###
##
myfunc <- function(x) {
if (is.numeric(x)) {
mean(x)
}
}
# For numeric elements we get a meaningful result. For other elements we
# don't get back anything
lapply(family1, myfunc)
## $name
## NULL
##
## $numofchild
## [1] 2
##
## $ages
## [1] 6
##
## $measles
## NULL
##
lapply(family1, function(x) {if (is.numeric(x))
mean(x)})
## $name
## NULL
##
## $numofchild
## [1] 2
##
## $ages
## [1] 6
##
## $measles
## NULL
#A list of lists !
family2 <- list(name="Espinoza",numofchild=4,ages=c(5,7,9,11),measles=c("Y","N","Y","Y"))
family3 <- list(name="Ginsberg",numofchild=3,ages=c(9,13,18),measles=c("Y","N","Y"))
family4 <- list(name="Souza",numofchild=5,
ages=c(3,5,7,9,11),
measles=c("N","Y","Y","Y","N"))
allfams <- list(f1=family1,f2=family2,f3=family3,
f4=family4)
str(allfams,max.level=1)
## List of 4
## $ f1:List of 4
## $ f2:List of 4
## $ f3:List of 4
## $ f4:List of 4
allfams$f3$ages # Get the ages of Family 3
## [1] 9 13 18
lapply(allfams, function(x) mean(x$ages))
## $f1
## [1] 6
##
## $f2
## [1] 8
##
## $f3
## [1] 13.33333
##
## $f4
## [1] 7
unlist(lapply(allfams, function(x) mean(x$ages)))
## f1 f2 f3 f4
## 6.00000 8.00000 13.33333 7.00000
# So check out the following. It gives us exactly what we want.
mean(unlist(lapply(allfams,
function(x) mean(x$ages))))
## [1] 8.583333
# An "expanded" version of this might have looked like:
mymeanages <- function(x) {
return(mean(x$ages))
}
hold <- lapply(allfams, mymeanages) # Get back a list with mean ages for each family
hold2 <- unlist(hold) # Turn the result into a vector since everything is a numeric value
mean(hold2) # Get the mean of all ages
## [1] 8.583333
hold <- lapply(allfams,function(x) {
x$numofchild >= 2 & x$numofchild <= 3})
which(hold == T)
## f1 f3
## 1 3
# Or we could it all in one go
which(lapply(allfams,function(x) {
x$numofchild >= 2 & x$numofchild <= 3}) == T)
## f1 f3
## 1 3
#Using the split command
unique(mtcars$cyl) # Cylinder takes on three distinct values
## [1] 6 4 8
# We could split the data frame based on cylinder group.
mydfs <- split(mtcars,mtcars$cyl)
str(mydfs,max.level=1)###!!!
## List of 3
## $ 4:'data.frame': 11 obs. of 11 variables:
## $ 6:'data.frame': 7 obs. of 11 variables:
## $ 8:'data.frame': 14 obs. of 11 variables:
fourcyl <- mtcars[mtcars$cyl==4,]
sixcyl <- mtcars[mtcars$cyl==6,]
eightcyl <- mtcars[mtcars$cyl==8,]
names(mydfs)
## [1] "4" "6" "8"
mydfs$"4"
## mpg cyl disp hp drat wt qsec vs am gear carb
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
mydfs <- split(mtcars,mtcars$cyl)
lapply(mydfs,function(x) mean(x$mpg))
## $`4`
## [1] 26.66364
##
## $`6`
## [1] 19.74286
##
## $`8`
## [1] 15.1
# Okay cool but we could bundle this up in one statement
lapply(split(mtcars,mtcars$cyl),function(x)
mean(x$mpg))
## $`4`
## [1] 26.66364
##
## $`6`
## [1] 19.74286
##
## $`8`
## [1] 15.1
# Or more economically (though potentially confusing to a newcomer)
unlist(lapply(split(mtcars,mtcars$cyl),function(x)
mean(x$mpg)))
## 4 6 8
## 26.66364 19.74286 15.10000
# Which is identical to the tapply function.
tapply(mtcars$mpg,mtcars$cyl,mean)
## 4 6 8
## 26.66364 19.74286 15.10000