# Applied Predictive Modeling Chapter 3: Exercise 3.1
#
# This web page provides answers to Exercise 3.1 of Applied Predictive Modeling.
# That exercise says:
#
# "The UC Irvine Machine Learning Repository contains a data set related
# to glass identification. The data consists of 214 glass samples labled
# as one of seven glass categories. There are nine predictors, including
# refractive index and percentages of eight elements: Na, Mg, Al, Si, K, Ca,
# Ba, and Fe."
#
#
# EXERCISE 3.1(a) DATA VISUALISATIONS
# Using visualizations, explore the predictor variables to
# understand their distributions as well as the relationships between
# predictors.
#
# To address 3.1(a), I load the some programs and data and then create graphics.
#
# Step 1: I load the libraries whose programs are used in the analysis
suppressMessages(library(mlbench)) # contains data sets used in the analysis
suppressMessages(library(caret)) # APM pre-procoessing program and others
suppressMessages(library(dplyr)) # used for some data manipulation
suppressMessages(library(e1071)) # for skewness computation
suppressMessages(library(tidyr)) # reshape a correlation matrix for use in heat map
suppressMessages(library(knitr)) # used to create output for web
suppressMessages(library(GGally)) # used for ggpairs correlation graph graph
#
# Step2: Load the GLASS dataset
#
data(Glass)
#
##
#
# Step 3: Create a histogram of the Y variable (i.e., Type of Glass)
#
ggplot(Glass,aes(x=Type))+
geom_histogram(fill="light blue",colour="black")+
ggtitle("Number of Observations by Type of Glass")

#
# The above analysis informs me that most of the glass Types are either Type 1 or Type 2.
#
# Step 4(a): I explore the X variables by regarding distributions and outliers,
# first by using box plots and then by using histograms. The following analysis
# loops through each X variable using "plot," which defaults in this case to a box plot.
#
# Note: The boxplot analysis also addresses 3.1(b) (below) because the box & whiskers
# shows outliers.
#
for(i in 1:9){
readline(prompt="Hit return to see next plot>> ")
plot(Glass$Type,Glass[,i],col="light blue",
xlab="Type of Glass",ylab=names(Glass)[i],main=paste("Series Chart",i,": Box Plot of Chemical Elements",
names(Glass)[i],"by Type of Glass"))
}
## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

#
# Step 4(b): Here is the same exercise using ggplot with geom_histogram. I include
# some accompanying notes that describe how I modified the usual (interactive) ggplot
# commands due to the fact that ggplot in this example is inside a function (a for loop):
# Modification 1. I used aes_string instead of aes when aes is inside a for loop.
# Modification 2. I embedded the ggplot function inside of a print() function. This is
# necessary to get ggplot to actually plot when it is inside another function (such as the
# for loop).
#
elem<-names(Glass)
for(i in 1:9){
readline(prompt="Hit return to see next plot>> ")
print(ggplot(Glass,aes_string(x=elem[10],y=elem[i]))+
geom_boxplot(fill="light blue")+
ggtitle(paste("Series Chart",i,": BoxPlot of Trace Chemical Element",elem[i],"\n by Type of Glass"))+
xlab("Type of Glass")) # in a for loop you must explicitly print ggplot
}
## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

#
# Here, I maintain the work space:
rm(elem,i)
#
# Step 5: In the next exploratory exercise, I compute mean and t.stat of each X by Y. I created
# files for each component (mean, var, n). There may be a way to improve the
# efficiency of this analysis. I also look at the variability of each of the X variables.
#
stat.bytype<-Glass%>%
group_by(Type)%>%
summarise_each(funs(n=n(),mean=mean,sd=sd,skew=skewness))%>%
select(1:2,11:37)
#
# Notice that summarise_each applies sapply(summarise). Must preface include funs(...).
#
# Step 6: I also want to see whether the number of non-zeros varies by Type for each of the chemical elements.
# I compute the percentage of records (by Glass Type) that have non-zero amounts by chemical element.
#
nz1<-data.frame(sapply(Glass[,1:9],function(x)ifelse(x!=0,1,0)))
nz1$Type<-Glass[,10]
nonzeros.bytype<-nz1%>%
group_by(Type)%>%
summarise_each(funs(mean))
nonzeros.bytype
## Source: local data frame [6 x 10]
##
## Type RI Na Mg Al Si K Ca Ba
## (fctr) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl)
## 1 1 1 1 1.0000000 1 1 0.9857143 1 0.04285714
## 2 2 1 1 0.8815789 1 1 0.9605263 1 0.07894737
## 3 3 1 1 1.0000000 1 1 0.9411765 1 0.05882353
## 4 5 1 1 0.4615385 1 1 1.0000000 1 0.15384615
## 5 6 1 1 0.6666667 1 1 0.0000000 1 0.00000000
## 6 7 1 1 0.2068966 1 1 0.4482759 1 0.89655172
## Variables not shown: Fe (dbl)
# There must be a more elegant way!
# Some tentative conclusions:
# If no Mg, maybe Type 7, since only 21% of Type 7 Glass had Mg.
# If no K, maybe type 6 or 7, since 0% of Type 6 and 45% of Type 7 had non-zeros for K.
# If no Ba, maybe NOT type 7, since 90% of Type 7 Glass had Ba.
# If no Fe, maybe Type 6, since NOT type 2, since 42% of Type 2 had Fe.
# Then, you can think of combinations:
# If you have no Mg and no Fe, but you have some Ba, then you may have Type 7.
#
rm(nz1)
#
# Plot the results of the non-zeros analysis.
nz2<-gather(nonzeros.bytype,elem,value,-Type)
#
ggplot(nz2,aes(x=Type,y=value,fill=elem,colour=elem))+
geom_point(size=8,position=position_jitter(width=0.4, height=.05))+
labs(xlab("Type of Glass"))+
labs(ylab("Proportion of Observations with Non-Zero Values"))+
ggtitle("Percent of Non-Zero Values of Each Element \n (By Type of Glass)")+
geom_hline(yintercept=.25,colour="dark grey",size=1)+
geom_hline(yintercept=.75,colour="dark grey",size=1)

# The darker grey lineS shows visually aids the identification of those elements (by Type) for which
# fewer than 25% of the data is non-zero and for which more than 75% of the data are non-zero.
#
# The chart shows that if you have little or no Ba, you might be anything but Type 7.
#
# No Mg: maybe Type 7.
# No Mg and No Fe: maybe Type 7.
# Mg but no Ba: probably not Type 7.
#
rm(nz2,nonzeros.bytype)
#
# Step 6(b). I want to see if the variables with numerous zeros have "near zero variance." If so, then they will be
# unlikely to help classify/predict the type of glass. I use the filtering functions explained on p. 55.
nearZeroVar(Glass)
## integer(0)
#
# In this case, none of the variables should be removed due to insufficient variability.
#
#
# Step 7(a): In the next exploratory anaysis, I examine graphically the correlations between
# explanatory variables.
# I do this graphically using the pairs() function, which is part of the {base} package.
pairs(Glass[,1:9],col="black",lower.panel=panel.smooth,pch=20)

#
# Step 7(b): The results are not very appealing, so I do the same exercise using ggpairs, which is part
# of the GGally package. The GGally package provides some canned programs that call on ggplot2.
#
ggpairs(Glass,columns=1:9,
title="XY Plot of Trace Elements",
upper=list(params=list(size=4)))+
theme_grey(base_size=8)

#
# You can see from the above plot that Mg has a lot of zeros, as do Fe and Ba.
# It's not clear whether centering/scaling is appropriate given the truncation of these X variables.
#
# Step 7:(c) I do the same correlation exercise using another function from GGally called ggscatmat()
# ggscatmat() shows the pairwise scatter plots of Xs in lower triangle,
# a density plot of the individual Xs on diagonal (i.e. a smoothed histogram
# of one of the chemical elements) and it reports the pairwise correlation coefficients
# in upper triangle. (At this point, I do not know how to adjust the font size
# of the correlation coefficients in the upper triangle.)
#
ggscatmat(Glass,columns=1:9,color=NULL)+
theme_grey(base_size=10)+
geom_point(colour="lightseagreen",na.rm=TRUE)+ #na.rm=remove records for which the data are missing
labs(title="XY Plot of Trace Elements \n (with Density and Correlation)")+
theme(plot.title = element_text(size = rel(1.5)))+
labs(xlab(" "))+
labs(ylab(" "))

#
# It is not clear to me why I had to put the xlab() and ylab() functions on separate lines.
#
# Step 7(d): I compute the Correlation matrix of X variables & associated t-statistics
# Here is the matrix of correlation coefficients:
#
glass.cor<-cor(Glass[,1:9])
glass.cordf<-data.frame(glass.cor) # converts corr matrix to a df
#
# This computes the correlation coefficient t-stat:
glass.cort<-glass.cor/sqrt((1-glass.cor^2)/(nrow(Glass)-2))
#
# To improve readability, I hide the upper triangles and print the correlation coefficients and their
# related t-statistics.
index<-upper.tri(glass.cor)
cor.g<-round(glass.cor,4)
cor.g[index]<-""
cor.g<-data.frame(cor.g)
cor.gt<-round(glass.cort,4)
cor.gt[index]<-""
cor.gt<-data.frame(cor.gt)
cor.g
## RI Na Mg Al Si K Ca Ba Fe
## RI 1
## Na -0.1919 1
## Mg -0.1223 -0.2737 1
## Al -0.4073 0.1568 -0.4818 1
## Si -0.5421 -0.0698 -0.1659 -0.0055 1
## K -0.2898 -0.2661 0.0054 0.326 -0.1933 1
## Ca 0.8104 -0.2754 -0.4438 -0.2596 -0.2087 -0.3178 1
## Ba -4e-04 0.3266 -0.4923 0.4794 -0.1022 -0.0426 -0.1128 1
## Fe 0.143 -0.2413 0.0831 -0.0744 -0.0942 -0.0077 0.125 -0.0587 1
cor.gt
## RI Na Mg Al Si K Ca Ba Fe
## RI Inf
## Na -2.8468 Inf
## Mg -1.7938 -4.1439 Inf
## Al -6.4939 2.3115 -8.0055 Inf
## Si -9.3919 -1.0189 -2.4499 -0.0804 Inf
## K -4.4093 -4.0192 0.0786 5.0202 -2.8691 Inf
## Ca 20.1403 -4.1719 -7.2098 -3.9139 -3.1076 -4.8809 Inf
## Ba -0.0056 5.0313 -8.2342 7.9538 -1.4952 -0.6211 -1.6536 Inf
## Fe 2.1039 -3.6211 1.2136 -1.0863 -1.3777 -0.1124 1.8339 -0.856 Inf
#
# Remove temp datasets
rm(glass.cor,glass.cort,index)
#
# Step 7(e): Here is yet another visualization of correlation: a Heatmap using ggplot.
# This is an attempt to replicate Figure 3.10 in the book.
#
# To create the heatmap, I started with the matrix of correlation coefficients.
# THen, I used the gather() function of the tidyr package to convert the
# correlation table into a format that ggplot could use. The new
# table has a row for each observation and a column for each variable. This
# is sometimes called the "long" format.
# Finally, I plotted the heatmap.
#
glass.cor2<-gather(glass.cordf) # This creates the long format.
glass.cor2$names<- names(glass.cordf) # I add a names column.
glass.cor2$names<-factor(glass.cor2$names,
levels=rev(names(glass.cordf))) #this controls *order* of y-axis in plot
ggplot(glass.cor2,aes(x=key,y=names))+
geom_tile(aes(fill = value),colour = "white")+
scale_fill_gradient2(limits=c(-1,1),low = "red",high = "darkblue")+
labs(list(title="Heatmap of Correlations of Independent Variables",x="Trace Element",y="Trace Element"))+
theme(plot.title = element_text(size = rel(1.5)))

#
# The heatmap shows that RI and Ca are positively correlated (0.81) and that RI and Si are negatively correlated,
# (-0.54) as is Ba and Mg (-0.49) and Al and Mg (-.048).
# Remove unneeded data
rm(glass.cordf,glass.cor2)
#
# By using the ggplot option called scale_fill_gradient2 instead of
# scale_fill_gradient, I ensured that correlations around 0.00 were white.
# In other words, the option centers whiteness at 0.00.
#
#
# Step 8: Here are the scatterplots of the five pairs whose correlation coefficients
# are at least abs(0.48). (Separate colors by Type of Glass):
#
x.var<-c("RI","RI","Mg","Mg","Al")
y.var<-c("Si","Ca","Al","Ba","Ba")
by.var<-"Type"
for(i in 1:length(x.var)){
x2<-x.var[i]
y2<-y.var[i]
readline(prompt="Hit return to see next plot>> ")
x.pos<-diff(range(Glass[,x2]))*.25+min(Glass[,x2])
y.pos<-diff(range(Glass[,y2]))*.85+min(Glass[,y2])
coef<-coef(lm(Glass[,y2]~Glass[,x2]))
print(ggplot(Glass,aes_string(x=x2,y=y2,colour=by.var,fill=by.var))+
geom_point()+
labs(title=paste("Series Chart",i,": Scatterplot of",x2,"and",y2,"\n (Color by",by.var,"of Glass)"))+
geom_abline(intercept=coef[1],slope=coef[2],colour="darkgrey")+
annotate("text",x=x.pos,y=y.pos,label=paste("Correlation=",
round(cor(Glass[x2],Glass[y2]),3)))
)
}
## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

rm(by.var,coef,x.pos,x.var,y.pos,y.var,x2,y2,i)
#
# The graphs show that some of the correlations are heavily influenced by the large numbers of
# zero values in one or the other (or both) of the variables.
#
# EXERCISE 3.1(B): OUTLIERS ANALYSIS
# NOTE: SEE ABOVE FOR BOX PLOT, WHICH SHOWS OUTLIERS
# ANOTHER ANALYSIS TO IDENTIFY OUTLIERS:
#
#
# In this exercise, I plot the histograms of all of the Xs to visualize skewness
# I then compute and display skewness on the histograms. This is the
# same info that is on the diagonal of the XY plots.
# (Note: No skew=0. -1<=skew<=1 is prob ok. skew<-1 or >1 is highly
# skewed.)
#
# The annotation in the chart presents the skewness coefficient. The ifelse
# statement in the code places the annotation to the right if positively
# skewed and to the left if negatively skewed. I have not figured out how to vary
# the vertical distance, which is hard-wired at a count of 25.
#
skew.g<-sapply(Glass[,1:9],function(x){round(skewness(x),4)})
elem<-names(Glass)
for(i in 1:9){
readline(prompt="Hit return to see next plot>> ")
quad.pos<-diff(range(Glass[,i]))*.75+min(Glass[,i])
quad.neg<-diff(range(Glass[,i]))*.25+min(Glass[,i])
x.g<-ifelse(skew.g[i]>=0,quad.pos,quad.neg) # use midpoint to compute offset for annotation
binw.g<-diff(range(Glass[,i]))/20 # compute bin width
suppressWarnings(print(
ggplot(Glass,aes_string(x=elem[i],fill="Type"))+
geom_histogram(stat="bin",binwidth=binw.g)+
ggtitle(paste("Series Chart",i,": Histogram of Trace Chemical",elem[i],"\n(Color by Type of Glass)"))+
annotate("text",label=paste("Skewness=",skew.g[i]),x=x.g,y=25)
)
)
}
## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

rm(quad.neg,quad.pos,skew.g,x.g,elem,binw.g,i)
#
# Warning messages in the above have been suppressed. To see the justification, see:
# http://stackoverflow.com/questions/14476961/why-do-i-get-position-dodge-requires-constant-width-even-though-widths-are-con
#
#
# Instead of creating histograms that stack by Type of Glass, we can facet wrap, and display each chart
# separately. In this case, it is easier to see how the counts of the trace chemical elements differ by type
# of glass.
#
skew.g<-sapply(Glass[,1:9],function(x){round(skewness(x),3)})
elem<-names(Glass)
for(i in 1:9){
readline(prompt="Hit return to see next plot>> ")
quad.pos<-diff(range(Glass[,i]))*.75+min(Glass[,i])
quad.neg<-diff(range(Glass[,i]))*.25+min(Glass[,i])
x.g<-ifelse(skew.g[i]>=0,quad.pos,quad.neg) # use midpoint to compute offset for annotation
binw.g<-diff(range(Glass[,i]))/20 # compute bin width
suppressWarnings(print(
ggplot(Glass,aes_string(x=elem[i]))+
geom_histogram(data=Glass,stat="bin",
binwidth=binw.g,colour="black",fill="light blue")+
facet_wrap(~Type)+
ggtitle(paste("Series Chart",i,": Histogram of Trace Element",elem[i],"\n (by Type of Glass)"))+
theme(strip.text.x=element_text(color="blue",face="bold"))
))
}
## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

rm(binw.g,elem,i,quad.neg,quad.pos,x.g,skew.g)
#
# The faceted graphs really highlight how certain trace elements are not found (i.e., many observations
# of zero) for certain types of glass. For example:
# Chart 7 shows Mg has numerous zeros for Type 7.
# Chart 8 shows Ba has numerous zeros possibly for Type 7.
# Chart 9 shows Fe has non zeros indicating possibly 1 or 2 (though Fe has zeros for all types)
#
# EXERCISE 3.1(C): POSSIBLE TRANSORMATIONS
# Step 1: Since most of the Xs are highly skewed, I want to center/scale/de-skew.
# I used caret's preProcess function to make the transformations. I performed this on all variables, even
# though the variables with numerous zero values are not good candidates for centering (the logic of which
# escapes me).
#
# BoxCox determines the transformation that would best normalize the
# distribution. Centering and scaling transform the variable to have
# a mean zero and standard deviation of 1.0.
#
trans<-preProcess(Glass[,1:9],method=c("BoxCox","center","scale"))
# Trans is an object that has the resuls of the computations: it must be
# applied to the raw data to create transformed data.
# Apply the above transformation equation to the raw data
glass.trans<-predict(trans,Glass[,1:9])
glass.trans$Type<-Glass[,10]
#
# Step 2: Let's look at the histograms of the transformed data
#
skew.g<-sapply(glass.trans[,1:9],function(x){round(skewness(x),4)})
elem<-names(Glass)
for(i in 1:9){
readline(prompt="Hit return to see next plot>> ")
quad.pos<-diff(range(glass.trans[,i]))*.75+min(glass.trans[,i])
quad.neg<-diff(range(glass.trans[,i]))*.25+min(glass.trans[,i])
x.g<-ifelse(skew.g[i]>=0,quad.pos,quad.neg) # use midpoint to compute offset for annotation
binw.g<-diff(range(glass.trans[,i]))/20 # compute bin width
suppressWarnings(print(
ggplot(glass.trans,aes_string(x=elem[i],fill="Type"))+
geom_histogram(stat="bin",binwidth=binw.g)+
ggtitle(paste("Series Chart",i,": Histogram of Transformed Trace Chemical Element",elem[i],"\n(Color by Type of Glass)"))+
annotate("text",label=paste("Skewness=",skew.g[i]),x=x.g,y=25)
))
}
## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

rm(skew.g,elem,i,quad.pos,quad.neg,x.g,binw.g)
#
# The transformation results for Ba and Fe (Charts )
# Step 3: Instead of stacking by type of glass, we can look at the transformed data by Type in individual
# windows (or "facets").
#
skew.g<-sapply(glass.trans[,1:9],function(x){round(skewness(x),3)})
elem<-names(Glass)
for(i in 1:9){
readline(prompt="Hit return to see next plot>> ")
quad.pos<-diff(range(glass.trans[,i]))*.75+min(glass.trans[,i])
quad.neg<-diff(range(glass.trans[,i]))*.25+min(glass.trans[,i])
x.g<-ifelse(skew.g[i]>=0,quad.pos,quad.neg) # use midpoint to compute offset for annotation
binw.g<-diff(range(glass.trans[,i]))/20 # compute bin width
suppressWarnings(print(
ggplot(glass.trans,aes_string(x=elem[i]))+
geom_histogram(data=glass.trans,stat="bin",
binwidth=binw.g,colour="black",fill="light yellow")+
facet_wrap(~Type)+
ggtitle(paste("Series Chart",i,": Histogram of Transformed Trace Chemical Element",elem[i],"\n (by Type of Glass)"))
))
}
## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

rm(i,skew.g,quad.pos,quad.neg,x.g,binw.g)
#
# For example, Chart 3 (Mg) shows the transformed distribution has a cluster of zeros near -2 for Type 7.
# This has less intuition than an untransformed cluster at zero.
#
# Step 4: Here's a comparison of the untransformed and transformed data by Type. Other than
# centering the data at a mean of zero, there does not appear to be an obvious normalization of the
# variables.
#
for(i in 1:9){
dat<-Glass%>%
select(i,10)
dat$trans<-glass.trans[,i]
dat<-gather(dat,elem,value,-Type) #this is from tidyr. It "melts" the data frame so that it can be more easily subsetted.
readline(prompt="Hit return to see next plot>> ")
binw.g<-diff(range(dat[,3]))/20 # compute bin width
suppressWarnings(print(
ggplot(dat,aes(x=value))+
geom_histogram(data=subset(dat,elem==names(Glass)[i]),
binwidth=binw.g,colour="black",fill="blue",alpha=.2)+
geom_histogram(data=subset(dat,elem=="trans"),
binwidth=binw.g,colour="black",fill="yellow",alpha=.3)+
facet_wrap(~Type)+
ggtitle(paste("Histogram of Actual (Blue) and Transformed (Yellow) Trace Chemical Element",
elem[i],"\n (by Type of Glass)"))
))
}
## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

# Step 5: And here is a comparison of the untransformed and transformed data overall.
# To do this, I removed the line "facet_wrap(~Type)" from the above code.
#
for(i in 1:9){
dat<-Glass%>%
select(i,10)
dat$trans<-glass.trans[,i]
dat<-gather(dat,elem,value,-Type) #this is from tidyr. It "melts" the data frame so that it can be more easily subsetted.
readline(prompt="Hit return to see next plot>> ")
binw.g<-diff(range(dat[,3]))/50 # compute bin width
suppressWarnings(print(
ggplot(dat,aes(x=value))+
geom_histogram(data=subset(dat,elem==names(Glass)[i]),
binwidth=binw.g,colour="black",fill="blue",alpha=.2)+
geom_histogram(data=subset(dat,elem=="trans"),
binwidth=binw.g,colour="black",fill="yellow",alpha=.3)+
ggtitle(paste("Series Chart",i,": Histogram of Actual (Blue) and Transformed (Yellow) Trace Chemical Element",
elem[i]))
))
}
## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

## Hit return to see next plot>>

# It seems that the following are meaningful candidates for centering/scaling:
# Series Chart 2 (Na)
# Chart 4 (Al)
# Chart 5 (Si)
# Chart 7 (Ca)
rm(binw.g,elem,i,dat)
rm(trans,cor.gt,cor.g)
#
#
# EXERCISE 3-1(C) Other relevant transformations of one or more predictors that
# might improve the classification model.
#
# (In process)