Install or Load Libraries
Download the Data
apply() Functions
tapply() Functions
lapply()
sapply()
aggregate()
sweep()
rm(list=ls())
RawDataset =iris #read.table(file = file.choose())
head(RawDataset)
NewData = subset(RawDataset, select = c("Sepal.Length","Sepal.Width","Petal.Length","Petal.Width"))
ProductDataSet = read.csv(file = file.choose(), header = TRUE)
colnames(ProductDataSet)=c("Product","Color","Price")
head(ProductDataSet)
Row-Level apply()
RowApply = apply(X=NewData, MARGIN = 1, FUN = mean)
head(RowApply)
1 2 3 4 5 6
2.550 2.375 2.350 2.350 2.550 2.850
Column-Level apply()
ColumnApply = apply(X=NewData, MARGIN = 2, FUN = mean)
ColumnApply
Sepal.Length Sepal.Width Petal.Length Petal.Width
5.843333 3.057333 3.758000 1.199333
Using Custom Functions
ColumnApply = apply(X=NewData, MARGIN = 2, FUN = function(x) sum(x)/length(x))
ColumnApply
Sepal.Length Sepal.Width Petal.Length Petal.Width
5.843333 3.057333 3.758000 1.199333
Running by Specific Attribute Levels
with(iris, tapply(X = Sepal.Length,INDEX = Species,FUN = mean))
setosa versicolor virginica
5.006 5.936 6.588
2X2 Tables
with(ProductDataSet, tapply(X=Price, INDEX = list(Product,Color), FUN = mean))
Black Blue Multi Red White
AWC Logo Cap NA NA 8.759467 NA NA
HL Road Frame - Red, 44 NA NA NA 1332.1078 NA
HL Road Frame - Red, 48 NA NA NA 1332.1078 NA
HL Road Frame - Red, 52 NA NA NA 1332.1078 NA
HL Road Frame - Red, 56 NA NA NA 1332.1078 NA
HL Road Frame - Red, 62 NA NA NA 1332.1078 NA
LL Road Frame - Black, 58 313.8061 NA NA NA NA
LL Road Frame - Black, 60 313.8061 NA NA NA NA
LL Road Frame - Black, 62 313.8061 NA NA NA NA
Long-Sleeve Logo Jersey, L NA NA 48.708200 NA NA
Long-Sleeve Logo Jersey, M NA NA 48.708200 NA NA
Long-Sleeve Logo Jersey, S NA NA 48.708200 NA NA
Long-Sleeve Logo Jersey, XL NA NA 48.708200 NA NA
Mountain Bike Socks, L NA NA NA NA 9.5
Mountain Bike Socks, M NA NA NA NA 9.5
Sport-100 Helmet, Black 34.0928 NA NA NA NA
Sport-100 Helmet, Blue NA 34.0928 NA NA NA
Sport-100 Helmet, Red NA NA NA 34.0928 NA
lapply() returns a list of the same length as X, each element of which is the result of applying FUN to the corresponding element of X.
Create a New List
x = list(A = 1:2, Beta = exp(-3:3), Logic = c(TRUE,FALSE,FALSE,TRUE))
data = x$A
Simple Multiplication of Each Elements of A by 10
The output is a list() but the unlist() function will tranform the output into a vector.
new = lapply(X=x$A, FUN = function(x) (x*10))
Report = data.frame(Original = unlist(x$A),Transformed = unlist(new))
Report
Simple Multiplication of Each Elements of A by 10
data = iris$Sepal.Length
new = sapply(X= data, FUN = function(x) (x*10))
Report = data.frame(Original = data, Transformed = new)
head(Report)
Simple Multiplication of Each Elements of A by 10
data = iris$Sepal.Length
Report = quantile(x = data)
Report
0% 25% 50% 75% 100%
4.3 5.1 5.8 6.4 7.9
Calculate the Quantiles for Each Variable in the iris Dataset
data = iris[,1:4]
Report = sapply(X = data, FUN = quantile)
Report
Sepal.Length Sepal.Width Petal.Length Petal.Width
0% 4.3 2.0 1.00 0.1
25% 5.1 2.8 1.60 0.3
50% 5.8 3.0 4.35 1.3
75% 6.4 3.3 5.10 1.8
100% 7.9 4.4 6.90 2.5
Calculate a specific Quantile (.75) for Each Variable in the iris Dataset
data = iris[,1:4]
Report = sapply(X = data, FUN = quantile, probs = 0.75)
Report
Sepal.Length.75% Sepal.Width.75% Petal.Length.75% Petal.Width.75%
6.4 3.3 5.1 1.8
Using aggregate() with vectors
data = subset(iris,select=c("Sepal.Length","Species"))
Report = as.data.frame(with(data, aggregate(Sepal.Length, by=list(Species), FUN="mean")))
colnames(Report) = c("Species","Mean")
Report
Using aggregate() through formula objects
data = ProductDataSet
Report = with(ProductDataSet, aggregate(Price~Product+Color, data = iris, FUN ="mean"))
colnames(Report) = c("Product","Color","Average Price")
head(Report)
Compute the mean of all variables by factors
Data = iris
Report = with(Data,aggregate(. ~Species, data = iris, FUN ="mean"))
Report
data = matrix(0,ncol =6, nrow = 6)
data
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 0 0 0 0 0
[2,] 0 0 0 0 0 0
[3,] 0 0 0 0 0 0
[4,] 0 0 0 0 0 0
[5,] 0 0 0 0 0 0
[6,] 0 0 0 0 0 0
Report = sweep(x=data, MARGIN = 1, STATS = 1, FUN = "+")
Report
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 1 1 1 1 1
[2,] 1 1 1 1 1 1
[3,] 1 1 1 1 1 1
[4,] 1 1 1 1 1 1
[5,] 1 1 1 1 1 1
[6,] 1 1 1 1 1 1