rwd <- "/media/alobo/USB2/RBiotitas"
datadir <- "/media/alobo/USB2/RBiotitas" #modify for yours
datadirout <- "/media/alobo/USB2/RBiotitas" #modify for yours
#dir.create(dataout)
require(scales)
require(robCompositions)
require(reshape2)
require(ggplot2)
ori <- read.csv(file.path(datadir,"Biotite_CB.csv"),
skip=0,header=TRUE,stringsAsFactors=FALSE)
dim(ori)
## [1] 9 283
#ori[1:2,]
ID <- names(ori[-1])
compound <- ori[,1]
CBbiot <- data.frame(t(ori[,-1]))
names(CBbiot) <- compound
head(CBbiot,3)
## SiO2 TiO2 Al2O3 FeO MnO MgO CaO Na2O K2O
## CB0102.D01 37.15 3.68 13.37 16.36 1.110 13.98 0.01 0.4600 8.98
## CB0102.D01b 38.14 3.53 13.10 15.94 1.160 13.84 0.04 1.1800 8.93
## CB0301.C05 37.94 3.47 12.83 16.83 1.073 13.48 0.00 0.4794 8.70
CBbiot <- cbind(ID=ID,CBbiot)
head(CBbiot,3)
## ID SiO2 TiO2 Al2O3 FeO MnO MgO CaO Na2O
## CB0102.D01 CB0102.D01 37.15 3.68 13.37 16.36 1.110 13.98 0.01 0.4600
## CB0102.D01b CB0102.D01b 38.14 3.53 13.10 15.94 1.160 13.84 0.04 1.1800
## CB0301.C05 CB0301.C05 37.94 3.47 12.83 16.83 1.073 13.48 0.00 0.4794
## K2O
## CB0102.D01 8.98
## CB0102.D01b 8.93
## CB0301.C05 8.70
summary(CBbiot)
## ID SiO2 TiO2 Al2O3
## CARRERAS1.A06: 1 Min. :30.4 Min. :0.30 Min. :10.7
## CARRERAS1.A07: 1 1st Qu.:36.2 1st Qu.:3.53 1st Qu.:13.3
## CARRERAS1.C06: 1 Median :37.0 Median :4.12 Median :13.6
## CB0102.D01 : 1 Mean :36.9 Mean :4.04 Mean :14.0
## CB0102.D01b : 1 3rd Qu.:37.8 3rd Qu.:4.87 3rd Qu.:14.0
## CB0301.C05 : 1 Max. :43.5 Max. :5.65 Max. :23.6
## (Other) :276
## FeO MnO MgO CaO
## Min. : 5.05 Min. :0.040 Min. : 2.37 Min. :0.000
## 1st Qu.:16.10 1st Qu.:0.240 1st Qu.:13.18 1st Qu.:0.000
## Median :16.76 Median :0.337 Median :13.69 Median :0.000
## Mean :16.70 Mean :0.491 Mean :13.40 Mean :0.181
## 3rd Qu.:17.27 3rd Qu.:0.580 3rd Qu.:14.18 3rd Qu.:0.040
## Max. :27.65 Max. :3.860 Max. :17.60 Max. :9.180
##
## Na2O K2O
## Min. :0.000 Min. :0.06
## 1st Qu.:0.381 1st Qu.:7.82
## Median :0.458 Median :8.63
## Mean :0.506 Mean :7.98
## 3rd Qu.:0.550 3rd Qu.:8.88
## Max. :3.500 Max. :9.52
##
We substitute 0 by 0.0001 to avoid problems with transforms
a <- CBbiot[,-1]
a[a<0.0001] <- 0.0001
CBbiot[,-1] <- a
head(CBbiot,3)
## ID SiO2 TiO2 Al2O3 FeO MnO MgO CaO Na2O
## CB0102.D01 CB0102.D01 37.15 3.68 13.37 16.36 1.110 13.98 1e-02 0.4600
## CB0102.D01b CB0102.D01b 38.14 3.53 13.10 15.94 1.160 13.84 4e-02 1.1800
## CB0301.C05 CB0301.C05 37.94 3.47 12.83 16.83 1.073 13.48 1e-04 0.4794
## K2O
## CB0102.D01 8.98
## CB0102.D01b 8.93
## CB0301.C05 8.70
summary(CBbiot)
## ID SiO2 TiO2 Al2O3
## CARRERAS1.A06: 1 Min. :30.4 Min. :0.30 Min. :10.7
## CARRERAS1.A07: 1 1st Qu.:36.2 1st Qu.:3.53 1st Qu.:13.3
## CARRERAS1.C06: 1 Median :37.0 Median :4.12 Median :13.6
## CB0102.D01 : 1 Mean :36.9 Mean :4.04 Mean :14.0
## CB0102.D01b : 1 3rd Qu.:37.8 3rd Qu.:4.87 3rd Qu.:14.0
## CB0301.C05 : 1 Max. :43.5 Max. :5.65 Max. :23.6
## (Other) :276
## FeO MnO MgO CaO
## Min. : 5.05 Min. :0.040 Min. : 2.37 Min. :0.000
## 1st Qu.:16.10 1st Qu.:0.240 1st Qu.:13.18 1st Qu.:0.000
## Median :16.76 Median :0.337 Median :13.69 Median :0.000
## Mean :16.70 Mean :0.491 Mean :13.40 Mean :0.181
## 3rd Qu.:17.27 3rd Qu.:0.580 3rd Qu.:14.18 3rd Qu.:0.040
## Max. :27.65 Max. :3.860 Max. :17.60 Max. :9.180
##
## Na2O K2O
## Min. :0.000 Min. :0.06
## 1st Qu.:0.381 1st Qu.:7.82
## Median :0.458 Median :8.63
## Mean :0.506 Mean :7.98
## 3rd Qu.:0.550 3rd Qu.:8.88
## Max. :3.500 Max. :9.52
##
CBbiotlog <- CBbiot
CBbiotlog[,-1] <- log10(CBbiot[,-1])
Compositional data:
hist(apply(CBbiot[,-1],1,sum))
thus we have to apply transforms
#### 1.2.1 Centered log-ratio
CBbiotlr <- cenLR(CBbiot[,-1])$x.clr
CBbiotlr <- cbind(ID=CBbiot[,1],CBbiotlr)
save(CBbiotlr,file="CBbiotlr.rda")
head(CBbiotlr,3)
## ID SiO2 TiO2 Al2O3 FeO MnO MgO CaO
## CB0102.D01 CB0102.D01 2.519 0.20724 1.497 1.699 -0.9913 1.542 -5.701
## CB0102.D01b CB0102.D01b 2.291 -0.08937 1.222 1.418 -1.2023 1.277 -4.570
## CB0301.C05 CB0301.C05 3.064 0.67250 1.980 2.252 -0.5008 2.030 -9.782
## Na2O K2O
## CB0102.D01 -1.872 1.0993
## CB0102.D01b -1.185 0.8387
## CB0301.C05 -1.307 1.5917
CBbiotiso <- data.frame(isomLR(CBbiot[,-1]))
CBbiotiso <- cbind(ID=CBbiot[,1],CBbiotiso)
head(CBbiotiso,3)
## ID X1 X2 X3 X4 X5 X6 X7 X8
## 1 CB0102.D01 -2.672 -0.5582 -2.038 -2.633 -0.2161 -3.204 4.339 2.101
## 2 CB0102.D01b -2.430 -0.2105 -1.660 -2.178 0.2616 -2.525 3.590 1.431
## 3 CB0301.C05 -3.250 -1.1284 -2.715 -3.510 -1.2219 -4.499 8.103 2.050
save(CBbiotiso,file="CBbiotiso.rda")
#Note Isometric transform creates newer variables (nb. of originals -1)
A priori Eruption events?
CBbiotm <- melt(CBbiot,id.vars=1, variable.name="Compund")
#CBbiotm[1:3,]
CBbiotm[,4] <- log10(CBbiotm[,3])
dim(CBbiotm)
## [1] 2538 4
delme <- melt(CBbiotlr,id.vars=1)
CBbiotm[,5] <- delme[,3]
names(CBbiotm)[3:5] <- c("Percent","LogPercent","LogRatio")
head(CBbiotm,3)
## ID Compund Percent LogPercent LogRatio
## 1 CB0102.D01 SiO2 37.15 1.570 2.519
## 2 CB0102.D01b SiO2 38.14 1.581 2.291
## 3 CB0301.C05 SiO2 37.94 1.579 3.064
rm(delme)
save(CBbiotm,file="CBbiotm.rda")
bxall <- ggplot(data=CBbiotm) +
geom_boxplot(aes(Compund,Percent),notch=FALSE)
bxall + ggtitle("Composition of Biotites in CB volcanic ash")
bxall + scale_y_continuous(trans = "log") +
ggtitle("Composition of Biotites in CB volcanic ash (log scale)")
bxallLR <- ggplot(data=CBbiotm) +
geom_boxplot(aes(Compund,LogRatio),notch=FALSE)
bxallLR + ggtitle("Composition of Biotites in CB volcanic ash (Log Ratio transform)")
hall <- ggplot(data=CBbiotm) +
geom_histogram(aes(Percent),binwidth=1, colour="black", fill="white") +
facet_grid(Compund~.)
hall + ggtitle("Composition of Biotites in CB volcanic ash")
hall + scale_y_continuous(trans = "log") +
ggtitle("Composition of Biotites in CB volcanic ash (log scale)")
hallLR <- ggplot(data=CBbiotm) +
geom_histogram(aes(LogRatio),binwidth=0.1, colour="black", fill="white") +
facet_grid(Compund~.)
hallLR + ggtitle("Composition of Biotites in CB volcanic ash (Log Ratio transform)")
pairs(CBbiot[,-1],pch=20, col=alpha(1,alpha=0.2),main="Composition of Biotites in CB volcanic ash")
pairs(CBbiotlog[,-1],pch=20, col=alpha(1,alpha=0.2),main="Composition of Biotites in CB volcanic ash (Log scale)")
pairs(CBbiotlr[,-1],pch=20, col=alpha(1,alpha=0.2),main="Composition of Biotites in CB volcanic ash (Log Ratio transform)")
standardize (or scale)
CBbiotS <- CBbiotlogS <- CBbiotlrS <- CBbiot
CBbiotisoS <- CBbiot[,-2] #1 var less
CBbiotisoS <- CBbiotCBbiotS[,-1] <- scale(CBbiot[,-1])
## Error: object 'CBbiotCBbiotS' not found
CBbiotlogS[,-1] <- scale(CBbiotlog[,-1])
CBbiotlrS[,-1] <- scale(CBbiotlr[,-1])
CBbiotisoS[,-1] <- scale(CBbiotiso[,-1])
CBbiotSpca <- prcomp(CBbiotS[,-1])
CBbiotlogSpca <- prcomp(CBbiotlogS[,-1])
CBbiotlrSpca <- prcomp(CBbiotlrS[,-1])
CBbiotisoSpca <- prcomp(CBbiotisoS[,-1])
plot(100*cumsum(CBbiotSpca$sdev/sum(CBbiotSpca$sdev)),col="black",type="b",
xlab="PC",ylab="%var")
points(100*cumsum(CBbiotlogSpca$sdev/sum(CBbiotlogSpca$sdev)),col="red",type="b")
points(100*cumsum(CBbiotlrSpca$sdev/sum(CBbiotlrSpca$sdev)),col="blue",type="b")
points(100*cumsum(CBbiotisoSpca$sdev/sum(CBbiotisoSpca$sdev)),col="cyan",type="b")
The above plot indicates that Log-Ratio and isometric transformations unveil more structure
par(mfrow=c(2,2),pty="s")
plot(CBbiotSpca$x[,1:2], type="n", main="Proportions",asp=1)
text(CBbiotSpca$x[,1:2], labels=1:nrow(CBbiotS), cex=0.5)
plot(CBbiotlogSpca$x[,1:2], type="n", main="Log transform",asp=1)
text(CBbiotlogSpca$x[,1:2], labels=1:nrow(CBbiotlogS), cex=0.5)
plot(CBbiotlrSpca$x[,1:2], type="n", main="Log Ratio",asp=1)
text(CBbiotlrSpca$x[,1:2], labels=1:nrow(CBbiotlrS), cex=0.5)
plot(CBbiotisoSpca$x[,1:2], type="n", main="Isometric Ratio",asp=1)
text(CBbiotisoSpca$x[,1:2], labels=1:nrow(CBbiotisoS), cex=0.5)
par(mfrow=c(2,2),pty="s")
plot(CBbiotlrSpca$x[,1:2], type="n", main="Log Ratio",asp=1)
text(CBbiotlrSpca$x[,1:2], labels=1:nrow(CBbiotlrS), cex=0.5)
plot(CBbiotlrSpca$x[,2:3], type="n", main="Log Ratio",asp=1)
text(CBbiotlrSpca$x[,2:3], labels=1:nrow(CBbiotlrS), cex=0.5)
plot(CBbiotlrSpca$x[,3:4], type="n", main="Log Ratio",asp=1)
text(CBbiotlrSpca$x[,3:4], labels=1:nrow(CBbiotlrS), cex=0.5)
par(mfrow=c(2,2),pty="s")
plot(CBbiotisoSpca$x[,1:2], type="n", main="Log Ratio",asp=1)
text(CBbiotisoSpca$x[,1:2], labels=1:nrow(CBbiotisoS), cex=0.5)
plot(CBbiotisoSpca$x[,2:3], type="n", main="Log Ratio,asp=1")
text(CBbiotisoSpca$x[,2:3], labels=1:nrow(CBbiotisoS), cex=0.5)
plot(CBbiotisoSpca$x[,3:4], type="n", main="Log Ratio",asp=1)
text(CBbiotisoSpca$x[,3:4], labels=1:nrow(CBbiotisoS), cex=0.5)
Some observations could be Inf or NA after transformation. We check:
any(is.infinite(data.matrix(CBbiot[,-1])))
## [1] FALSE
any(is.infinite(data.matrix(CBbiotlog[,-1])))
## [1] FALSE
any(is.infinite(data.matrix(CBbiotlr[,-1])))
## [1] FALSE
any(is.infinite(data.matrix(CBbiotiso[,-1])))
## [1] FALSE
Average method
CBbiotShca <- hclust(dist(CBbiotS[,-1],method="euclidean"), method="average")
plot(CBbiotShca, cex=0.5)
CBbiotlogShca <- hclust(dist(CBbiotlogS[,-1],method="euclidean"), method="average")
plot(CBbiotlogShca, cex=0.5)
CBbiotlrShca <- hclust(dist(CBbiotlrS[,-1],method="euclidean"), method="average")
plot(CBbiotlrShca, cex=0.25)
CBbiotisoShca <- hclust(dist(CBbiotisoS[,-1],method="euclidean"), method="average")
plot(CBbiotisoShca, cex=0.25)
Ward’s (minimum variance) method
CBbiotShcw <- hclust(dist(CBbiotS[,-1],method="euclidean"), method="ward")
plot(CBbiotShcw, cex=0.5)
CBbiotlogShcw <- hclust(dist(CBbiotlogS[,-1],method="euclidean"), method="ward")
plot(CBbiotlogShcw, cex=0.5)
CBbiotlrShcw <- hclust(dist(CBbiotlrS[,-1],method="euclidean"), method="ward")
plot(CBbiotlrShcw, cex=0.25)
CBbiotisoShcw <- hclust(dist(CBbiotisoS[,-1],method="euclidean"), method="ward")
plot(CBbiotisoShcw, cex=0.25)
CBbiotlogShcw8 <- cutree(CBbiotlogShcw,k=8)
CBbiotlrShcw7 <- cutree(CBbiotlrShcw,k=7)
CBbiotisoShcw7 <- cutree(CBbiotisoShcw,k=7)
Classifications resulting from Log-Ratio and Isometric transforms are almost identical:
table(CBbiotlrShcw7,CBbiotisoShcw7)
## CBbiotisoShcw7
## CBbiotlrShcw7 1 2 3 4 5 6 7
## 1 24 0 12 0 0 0 0
## 2 0 32 0 0 0 0 0
## 3 0 1 0 112 0 0 0
## 4 0 0 33 0 1 0 0
## 5 0 0 0 0 38 0 0
## 6 0 0 0 0 0 21 0
## 7 0 0 0 0 0 0 8
Classification resulting from Log-transform is also very similar to those resulting from Log-Ratio and Isometric transforms
table(CBbiotlrShcw7,CBbiotlogShcw7)
## Error: object 'CBbiotlogShcw7' not found
par(mfrow=c(1,2),pty="s")
plot(CBbiotlogSpca$x[,1:2], type="n", main="Log-transformed data",asp=1)
text(CBbiotlogSpca$x[,1:2], labels=1:nrow(CBbiotlogS), col=CBbiotlogShcw8, cex=0.5)
legend(x=9,y=8,legend=1:8, pch=19,col=1:8,cex=0.7)
plot(CBbiotlogSpca$x[,2:3], type="n", main="Log-transformed data",asp=1)
text(CBbiotlogSpca$x[,2:3], labels=1:nrow(CBbiotlogS), col=CBbiotlogShcw8, cex=0.5)
legend(x=8,y=5,legend=1:8, pch=19,col=1:7,cex=0.7)
par(mfrow=c(1,2),pty="s")
plot(CBbiotlrSpca$x[,1:2], type="n", main="Log Ratio transformed data",asp=1)
text(CBbiotlrSpca$x[,1:2], labels=1:nrow(CBbiotlrS), col=CBbiotlrShcw7, cex=0.5)
legend(x=5,y=7,legend=1:7, pch=19,col=1:7,cex=0.7)
plot(CBbiotlrSpca$x[,2:3], type="n", main="Log Ratio transformed data",asp=1)
text(CBbiotlrSpca$x[,2:3], labels=1:nrow(CBbiotlrS), col=CBbiotlrShcw7, cex=0.5)
legend(x=7,y=2.5,legend=1:7, pch=19,col=1:7,cex=0.7)
plot(CBbiotlrSpca$x[,1:2], type="n", main="Log Ratio transformed data")
text(CBbiotlrSpca$x[,1:2], labels=1:nrow(CBbiotlrS), col=CBbiotlrShcw7, cex=0.7)
legend(x=5,y=7,legend=1:7, pch=19,col=1:7,cex=0.7)
plot(CBbiotlrSpca$x[,2:3], type="n", main="Log Ratio transformed data")
text(CBbiotlrSpca$x[,2:3], labels=1:nrow(CBbiotlrS), col=CBbiotlrShcw7, cex=0.7)
legend(x=7.5,y=2.5,legend=1:7, pch=19,col=1:7,cex=0.7)
par(mfrow=c(1,2),pty="s")
plot(CBbiotisoSpca$x[,1:2], type="n", main="Log Ratio transformed data",asp=1)
text(CBbiotisoSpca$x[,1:2], labels=1:nrow(CBbiotisoS), col=CBbiotisoShcw7, cex=0.5)
legend(x=6,y=4,legend=1:7, pch=19,col=1:7,cex=0.7)
plot(CBbiotisoSpca$x[,2:3], type="n", main="Log Ratio transformed data",asp=1)
text(CBbiotisoSpca$x[,2:3], labels=1:nrow(CBbiotisoS), col=CBbiotisoShcw7, cex=0.5)
legend(x=4,y=2,legend=1:7, pch=19,col=1:7,cex=0.7)
plot(CBbiotisoSpca$x[,1:2], type="n", main="Log Ratio transformed data")
text(CBbiotisoSpca$x[,1:2], labels=1:nrow(CBbiotisoS), col=CBbiotisoShcw7, cex=0.7)
legend(x=6,y=4,legend=1:7, pch=19,col=1:7,cex=0.7)
plot(CBbiotisoSpca$x[,2:3], type="n", main="Log Ratio transformed data")
text(CBbiotisoSpca$x[,2:3], labels=1:nrow(CBbiotisoS), col=CBbiotisoShcw7, cex=0.7)
legend(x=4,y=2,legend=1:7, pch=19,col=1:7,cex=0.7)
Weird elements:
CBbiotlrS[c(251,253),]
## ID SiO2 TiO2 Al2O3 FeO MnO MgO CaO
## CEMENT.V2.A08 CEMENT.V2.A08 3.166 1.181 3.544 3.073 1.089 2.677 -0.4614
## CEMENT.V2.C06 CEMENT.V2.C06 3.100 1.555 3.429 3.148 1.342 2.700 -0.4693
## Na2O K2O
## CEMENT.V2.A08 -8.457 1.889
## CEMENT.V2.C06 -8.487 1.546
We decide to keep the classifications based on the log-ratio and isometric transformations