For the library andrews check this link to download the achived package: https://cran.r-project.org/src/contrib/Archive/andrews/

# install.packages('HSAUR3')
# install.packages('MVA')
# install.packages('scatterplot3d')
# install.packages("KernSmooth")
# install.packages("andrews")
# install.packages('KernSmooth')
library(HSAUR3)
## Loading required package: tools
data("USairpollution", package = "HSAUR3")

Checking the structure of the dataset and first few records of the dataset

str(USairpollution)
## 'data.frame':    41 obs. of  7 variables:
##  $ SO2    : int  46 11 24 47 11 31 110 23 65 26 ...
##  $ temp   : num  47.6 56.8 61.5 55 47.1 55.2 50.6 54 49.7 51.5 ...
##  $ manu   : int  44 46 368 625 391 35 3344 462 1007 266 ...
##  $ popul  : int  116 244 497 905 463 71 3369 453 751 540 ...
##  $ wind   : num  8.8 8.9 9.1 9.6 12.4 6.5 10.4 7.1 10.9 8.6 ...
##  $ precip : num  33.36 7.77 48.34 41.31 36.11 ...
##  $ predays: int  135 58 115 111 166 148 122 132 155 134 ...
head(USairpollution, 5)
##             SO2 temp manu popul wind precip predays
## Albany       46 47.6   44   116  8.8  33.36     135
## Albuquerque  11 56.8   46   244  8.9   7.77      58
## Atlanta      24 61.5  368   497  9.1  48.34     115
## Baltimore    47 55.0  625   905  9.6  41.31     111
## Buffalo      11 47.1  391   463 12.4  36.11     166
mlab <- "Manufacturing enterprises with 20 or more workers"
plab <- "Population size (1970 census) in thousands"
plot(popul ~ manu, data = USairpollution, xlab = mlab, ylab = plab, main="Population size vs Manufacturing Enterprises")

Alternatively, we could put markers for each point(circle) on the plot

plot(popul ~ manu, data = USairpollution, xlab = mlab, ylab = plab, main="Population size vs Manufacturing Enterprises")
rug(USairpollution$manu, side = 1)
rug(USairpollution$popul, side = 2)

Alternatively, we could represent point(circles) of the scatterplot with text.

layout(matrix(c(2, 0, 1, 3), nrow = 2, byrow = TRUE), widths = c(2, 1), heights = c(1, 2), respect = TRUE)
xlim <- with(USairpollution, range(manu)) * 1.1
plot(popul ~ manu, data = USairpollution, cex.lab = 0.9, xlab = mlab, ylab = plab,  main="Population size vs Manufacturing Enterprises", type = "n", xlim = xlim)
with(USairpollution, text(manu, popul, cex = 0.6, labels = abbreviate(row.names(USairpollution))))

with(USairpollution, hist(manu, xlab=mlab, main = "histogram of maufacturing enteprise", xlim = xlim))

with(USairpollution, boxplot(xlab=plab, popul, main="Vertical boxplot of 1970 population size"))

outcity <- match(lab <- c("Chicago", "Detroit", "Cleveland", "Philadelphia"), rownames(USairpollution))
x <- USairpollution[, c("manu", "popul")]
library("MVA")
## Loading required package: HSAUR2
## Registered S3 methods overwritten by 'HSAUR2':
##   method         from  
##   toLatex.tabtab HSAUR3
##   toLatex.dftab  HSAUR3
## 
## Attaching package: 'HSAUR2'
## The following objects are masked from 'package:HSAUR3':
## 
##     CHFLS, HSAURtable
bvbox(x, mtitle = "", xlab = mlab, ylab = plab, , cex.main=0.8, main="Bivariate (Boxplot in two dimensions) plots of population size and manufacturing countries")
text(x$manu[outcity], x$popul[outcity], labels = lab, cex = 0.7, pos = c(2, 2, 4, 2, 2))

Computing the subsets of points which lie on the convex hull of Usairpollution\(manu** and **Usairpollution\)popul and making a plot with this.

(hull <- with(USairpollution, chull(manu, popul)))
## [1]  9 15 41  6  2 18 16 14  7
with(USairpollution,plot(manu, popul, pch = 1, xlab = mlab, ylab = plab))
with(USairpollution,polygon(manu[hull], popul[hull], density = 15, angle = 30))

with(USairpollution, cor(manu[-hull],popul[-hull]))
## [1] 0.9225267
par(mfrow=c(1,1))
with(USairpollution, plot(manu, popul, xlab = mlab, main="Population size vs Manufacturing Enterprises", ylab = plab, cex.lab = 0.9))

with(USairpollution, chiplot(manu, popul, main="chi-square plot of independence"))

ylim <- with(USairpollution, range(wind)) * c(0.95, 1)
plot(wind ~ temp, data = USairpollution, xlab = "Average annual temperature (Fahrenheit)", ylab = "Average annual wind speed (m.p.h.)", main = "wind speed vs temperature with SO2 concentration", pch = 10, ylim = ylim)
with(USairpollution, symbols(temp, wind, circles = SO2, inches = 0.5, add = TRUE))

plot(wind ~ temp, data = USairpollution, xlab = "Average annual temperature (Fahrenheit)", ylab = "Average annual wind speed (m.p.h.)", main="windpeed vs temperature by location", pch = 10, ylim = ylim)
with(USairpollution, stars(USairpollution[,-c(2,5)], locations = cbind(temp, wind),labels = NULL, add = TRUE, cex = 0.5))

stars(USairpollution, cex = 0.55)

We can also look at the pair plot of variables of USairpoluution dataset with one another.

pairs(USairpollution, main="The pairs plot for variable pairs of USairpollution are shown below", pch = ".", cex = 1.5)

paste("The correlation matrix of variables of the data are")
## [1] "The correlation matrix of variables of the data are"
round(cor(USairpollution), 4)
##             SO2    temp    manu   popul    wind  precip predays
## SO2      1.0000 -0.4336  0.6448  0.4938  0.0947  0.0543  0.3696
## temp    -0.4336  1.0000 -0.1900 -0.0627 -0.3497  0.3863 -0.4302
## manu     0.6448 -0.1900  1.0000  0.9553  0.2379 -0.0324  0.1318
## popul    0.4938 -0.0627  0.9553  1.0000  0.2126 -0.0261  0.0421
## wind     0.0947 -0.3497  0.2379  0.2126  1.0000 -0.0130  0.1641
## precip   0.0543  0.3863 -0.0324 -0.0261 -0.0130  1.0000  0.4961
## predays  0.3696 -0.4302  0.1318  0.0421  0.1641  0.4961  1.0000
pairs(USairpollution, panel = function (x, y, ...) {
                                  points(x, y, ...)
                                  abline(lm(y ~ x), col = "grey")
                              }, main="The pairs plot with fitted line for variable pairs of USairpollution are shown below", cex.main=0.8, pch = ".", cex = 1.5)

epa <- function(x, y) ((x^2 + y^2) < 1) * 2/pi * (1 - x^2 - y^2)
x <- seq(from = -1.1, to = 1.1, by = 0.05)
epavals <- sapply(x, function(a) epa(a, x))
persp(x = x, y = x, z = epavals, xlab = "x", ylab = "y", zlab = expression(K(x, y)), theta = -35, axes = TRUE, box = TRUE)

library("scatterplot3d")
with(USairpollution, scatterplot3d(temp, wind, SO2, type = "h", angle = 255, main="3D scatterplot of temperature, windspeed and SO2"))

Next we consider the CYGOB1 dataset of the HSAUR3 package

str(CYGOB1)
## 'data.frame':    47 obs. of  2 variables:
##  $ logst: num  4.37 4.56 4.26 4.56 4.3 4.46 3.84 4.57 4.26 4.37 ...
##  $ logli: num  5.23 5.74 4.93 5.74 5.19 5.46 4.65 5.27 5.57 5.12 ...

looking at the first few records of the dataset

head(CYGOB1, 5)
##      logst logli
## [1,]  4.37  5.23
## [2,]  4.56  5.74
## [3,]  4.26  4.93
## [4,]  4.56  5.74
## [5,]  4.30  5.19

Now we consider some kernel smoothing functions for the above dataset

library("KernSmooth")
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
plot(density(CYGOB1[,1],bw="ucv")) #bw="SJ"
## Warning in bw.ucv(x): minimum occurred at one end of the range

plot(density(CYGOB1[,2]))

CYGOB1d <- bkde2D(CYGOB1, bandwidth = sapply(CYGOB1, dpik))
plot(CYGOB1, xlab = "log surface temperature", ylab = "log light intensity")
contour(x = CYGOB1d$x1, y = CYGOB1d$x2, z = CYGOB1d$fhat, add = TRUE,nlevels=5)

persp(theta= 90,phi = 85,x = CYGOB1d$x1, y = CYGOB1d$x2, z = CYGOB1d$fhat, xlab = "log surface temperature", ylab = "log light intensity", zlab = "density")

The following visualization below include density plot, bivariate plot made using the xyplot function of lattice package, and a 3d scatterplot using the cloud function of the lattice package respectively.

library(lattice)
attach(USairpollution)
## The following object is masked from package:datasets:
## 
##     precip
plot(density(wind))

plot(xyplot(SO2 ~ temp| cut(wind, 2), data = USairpollution))

pollution <- with(USairpollution, equal.count(SO2,4))
plot(cloud(precip ~ temp * wind | pollution, panel.aspect = 0.9,data = USairpollution))

Next we consider the quakes dataset of the datasets package

str(quakes)
## 'data.frame':    1000 obs. of  5 variables:
##  $ lat     : num  -20.4 -20.6 -26 -18 -20.4 ...
##  $ long    : num  182 181 184 182 182 ...
##  $ depth   : int  562 650 42 626 649 195 82 194 211 622 ...
##  $ mag     : num  4.8 4.2 5.4 4.1 4 4 4.8 4.4 4.7 4.3 ...
##  $ stations: int  41 15 43 19 11 12 43 15 35 19 ...

looking at the first few records of the dataset

head(quakes, 5)
##      lat   long depth mag stations
## 1 -20.42 181.62   562 4.8       41
## 2 -20.62 181.03   650 4.2       15
## 3 -26.00 184.10    42 5.4       43
## 4 -17.97 181.66   626 4.1       19
## 5 -20.42 181.96   649 4.0       11

The following visualization below include density plot, bivariate plot made using the xyplot function of lattice package, and a 3d scatterplot using the cloud function of the lattice package respectively.

attach(quakes)
plot(density(depth))

plot(xyplot(lat ~ long| cut(depth, 3), data = quakes, layout = c(3, 1), xlab = "Longitude", ylab = "Latitude"))

plot(cloud(depth ~ lat * long | mag, data = quakes, zlim = rev(range(quakes$depth)), screen = list(z = 105, x = -70), panel.aspect = 0.9, xlab = "Longitude", ylab = "Latitude", zlab = "Depth"))

For the library andrews check this link to download the achived package: https://cran.r-project.org/src/contrib/Archive/andrews/

Using all datasets read in from above, we apply the andrews function (a function for making curves for multidimensional data visualization) from the andrews package

library("andrews")
par(mfcol=c(1,1))
andrews(CYGOB1,ymax=1.5)

andrews(quakes,ymax=3)

andrews(USairpollution,ymax=3)

id.pollution = rep(0,dim(USairpollution)[1])
id.pollution[outcity] = rep(2,length(outcity))
cbind(USairpollution,id.pollution)->USairP
attach(USairP)
## The following object is masked _by_ .GlobalEnv:
## 
##     id.pollution
## The following objects are masked from USairpollution:
## 
##     manu, popul, precip, predays, SO2, temp, wind
## The following object is masked from package:datasets:
## 
##     precip
andrews(USairP,type=4,clr=8,ymax=1.5)

stalac function of the MVA package used to detect and identify multivariate outliers and this was applied to the USairpollution data sets.

stalac(USairpollution)

andrews function applied once again to the iris data of the datasets package

data(iris)
andrews(iris,clr=5,ymax=3)

andrews(iris,type=4,clr=5,ymax=2)