For the library andrews check this link to download the achived package: https://cran.r-project.org/src/contrib/Archive/andrews/
# install.packages('HSAUR3')
# install.packages('MVA')
# install.packages('scatterplot3d')
# install.packages("KernSmooth")
# install.packages("andrews")
# install.packages('KernSmooth')
library(HSAUR3)
## Loading required package: tools
data("USairpollution", package = "HSAUR3")
Checking the structure of the dataset and first few records of the dataset
str(USairpollution)
## 'data.frame': 41 obs. of 7 variables:
## $ SO2 : int 46 11 24 47 11 31 110 23 65 26 ...
## $ temp : num 47.6 56.8 61.5 55 47.1 55.2 50.6 54 49.7 51.5 ...
## $ manu : int 44 46 368 625 391 35 3344 462 1007 266 ...
## $ popul : int 116 244 497 905 463 71 3369 453 751 540 ...
## $ wind : num 8.8 8.9 9.1 9.6 12.4 6.5 10.4 7.1 10.9 8.6 ...
## $ precip : num 33.36 7.77 48.34 41.31 36.11 ...
## $ predays: int 135 58 115 111 166 148 122 132 155 134 ...
head(USairpollution, 5)
## SO2 temp manu popul wind precip predays
## Albany 46 47.6 44 116 8.8 33.36 135
## Albuquerque 11 56.8 46 244 8.9 7.77 58
## Atlanta 24 61.5 368 497 9.1 48.34 115
## Baltimore 47 55.0 625 905 9.6 41.31 111
## Buffalo 11 47.1 391 463 12.4 36.11 166
mlab <- "Manufacturing enterprises with 20 or more workers"
plab <- "Population size (1970 census) in thousands"
plot(popul ~ manu, data = USairpollution, xlab = mlab, ylab = plab, main="Population size vs Manufacturing Enterprises")
Alternatively, we could put markers for each point(circle) on the plot
plot(popul ~ manu, data = USairpollution, xlab = mlab, ylab = plab, main="Population size vs Manufacturing Enterprises")
rug(USairpollution$manu, side = 1)
rug(USairpollution$popul, side = 2)
Alternatively, we could represent point(circles) of the scatterplot with text.
layout(matrix(c(2, 0, 1, 3), nrow = 2, byrow = TRUE), widths = c(2, 1), heights = c(1, 2), respect = TRUE)
xlim <- with(USairpollution, range(manu)) * 1.1
plot(popul ~ manu, data = USairpollution, cex.lab = 0.9, xlab = mlab, ylab = plab, main="Population size vs Manufacturing Enterprises", type = "n", xlim = xlim)
with(USairpollution, text(manu, popul, cex = 0.6, labels = abbreviate(row.names(USairpollution))))
with(USairpollution, hist(manu, xlab=mlab, main = "histogram of maufacturing enteprise", xlim = xlim))
with(USairpollution, boxplot(xlab=plab, popul, main="Vertical boxplot of 1970 population size"))
outcity <- match(lab <- c("Chicago", "Detroit", "Cleveland", "Philadelphia"), rownames(USairpollution))
x <- USairpollution[, c("manu", "popul")]
library("MVA")
## Loading required package: HSAUR2
## Registered S3 methods overwritten by 'HSAUR2':
## method from
## toLatex.tabtab HSAUR3
## toLatex.dftab HSAUR3
##
## Attaching package: 'HSAUR2'
## The following objects are masked from 'package:HSAUR3':
##
## CHFLS, HSAURtable
bvbox(x, mtitle = "", xlab = mlab, ylab = plab, , cex.main=0.8, main="Bivariate (Boxplot in two dimensions) plots of population size and manufacturing countries")
text(x$manu[outcity], x$popul[outcity], labels = lab, cex = 0.7, pos = c(2, 2, 4, 2, 2))
Computing the subsets of points which lie on the convex hull of Usairpollution\(manu** and **Usairpollution\)popul and making a plot with this.
(hull <- with(USairpollution, chull(manu, popul)))
## [1] 9 15 41 6 2 18 16 14 7
with(USairpollution,plot(manu, popul, pch = 1, xlab = mlab, ylab = plab))
with(USairpollution,polygon(manu[hull], popul[hull], density = 15, angle = 30))
with(USairpollution, cor(manu[-hull],popul[-hull]))
## [1] 0.9225267
par(mfrow=c(1,1))
with(USairpollution, plot(manu, popul, xlab = mlab, main="Population size vs Manufacturing Enterprises", ylab = plab, cex.lab = 0.9))
with(USairpollution, chiplot(manu, popul, main="chi-square plot of independence"))
ylim <- with(USairpollution, range(wind)) * c(0.95, 1)
plot(wind ~ temp, data = USairpollution, xlab = "Average annual temperature (Fahrenheit)", ylab = "Average annual wind speed (m.p.h.)", main = "wind speed vs temperature with SO2 concentration", pch = 10, ylim = ylim)
with(USairpollution, symbols(temp, wind, circles = SO2, inches = 0.5, add = TRUE))
plot(wind ~ temp, data = USairpollution, xlab = "Average annual temperature (Fahrenheit)", ylab = "Average annual wind speed (m.p.h.)", main="windpeed vs temperature by location", pch = 10, ylim = ylim)
with(USairpollution, stars(USairpollution[,-c(2,5)], locations = cbind(temp, wind),labels = NULL, add = TRUE, cex = 0.5))
stars(USairpollution, cex = 0.55)
We can also look at the pair plot of variables of USairpoluution dataset with one another.
pairs(USairpollution, main="The pairs plot for variable pairs of USairpollution are shown below", pch = ".", cex = 1.5)
paste("The correlation matrix of variables of the data are")
## [1] "The correlation matrix of variables of the data are"
round(cor(USairpollution), 4)
## SO2 temp manu popul wind precip predays
## SO2 1.0000 -0.4336 0.6448 0.4938 0.0947 0.0543 0.3696
## temp -0.4336 1.0000 -0.1900 -0.0627 -0.3497 0.3863 -0.4302
## manu 0.6448 -0.1900 1.0000 0.9553 0.2379 -0.0324 0.1318
## popul 0.4938 -0.0627 0.9553 1.0000 0.2126 -0.0261 0.0421
## wind 0.0947 -0.3497 0.2379 0.2126 1.0000 -0.0130 0.1641
## precip 0.0543 0.3863 -0.0324 -0.0261 -0.0130 1.0000 0.4961
## predays 0.3696 -0.4302 0.1318 0.0421 0.1641 0.4961 1.0000
pairs(USairpollution, panel = function (x, y, ...) {
points(x, y, ...)
abline(lm(y ~ x), col = "grey")
}, main="The pairs plot with fitted line for variable pairs of USairpollution are shown below", cex.main=0.8, pch = ".", cex = 1.5)
epa <- function(x, y) ((x^2 + y^2) < 1) * 2/pi * (1 - x^2 - y^2)
x <- seq(from = -1.1, to = 1.1, by = 0.05)
epavals <- sapply(x, function(a) epa(a, x))
persp(x = x, y = x, z = epavals, xlab = "x", ylab = "y", zlab = expression(K(x, y)), theta = -35, axes = TRUE, box = TRUE)
library("scatterplot3d")
with(USairpollution, scatterplot3d(temp, wind, SO2, type = "h", angle = 255, main="3D scatterplot of temperature, windspeed and SO2"))
Next we consider the CYGOB1 dataset of the HSAUR3 package
str(CYGOB1)
## 'data.frame': 47 obs. of 2 variables:
## $ logst: num 4.37 4.56 4.26 4.56 4.3 4.46 3.84 4.57 4.26 4.37 ...
## $ logli: num 5.23 5.74 4.93 5.74 5.19 5.46 4.65 5.27 5.57 5.12 ...
looking at the first few records of the dataset
head(CYGOB1, 5)
## logst logli
## [1,] 4.37 5.23
## [2,] 4.56 5.74
## [3,] 4.26 4.93
## [4,] 4.56 5.74
## [5,] 4.30 5.19
Now we consider some kernel smoothing functions for the above dataset
library("KernSmooth")
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
plot(density(CYGOB1[,1],bw="ucv")) #bw="SJ"
## Warning in bw.ucv(x): minimum occurred at one end of the range
plot(density(CYGOB1[,2]))
CYGOB1d <- bkde2D(CYGOB1, bandwidth = sapply(CYGOB1, dpik))
plot(CYGOB1, xlab = "log surface temperature", ylab = "log light intensity")
contour(x = CYGOB1d$x1, y = CYGOB1d$x2, z = CYGOB1d$fhat, add = TRUE,nlevels=5)
persp(theta= 90,phi = 85,x = CYGOB1d$x1, y = CYGOB1d$x2, z = CYGOB1d$fhat, xlab = "log surface temperature", ylab = "log light intensity", zlab = "density")
The following visualization below include density plot, bivariate plot made using the xyplot function of lattice package, and a 3d scatterplot using the cloud function of the lattice package respectively.
library(lattice)
attach(USairpollution)
## The following object is masked from package:datasets:
##
## precip
plot(density(wind))
plot(xyplot(SO2 ~ temp| cut(wind, 2), data = USairpollution))
pollution <- with(USairpollution, equal.count(SO2,4))
plot(cloud(precip ~ temp * wind | pollution, panel.aspect = 0.9,data = USairpollution))
Next we consider the quakes dataset of the datasets package
str(quakes)
## 'data.frame': 1000 obs. of 5 variables:
## $ lat : num -20.4 -20.6 -26 -18 -20.4 ...
## $ long : num 182 181 184 182 182 ...
## $ depth : int 562 650 42 626 649 195 82 194 211 622 ...
## $ mag : num 4.8 4.2 5.4 4.1 4 4 4.8 4.4 4.7 4.3 ...
## $ stations: int 41 15 43 19 11 12 43 15 35 19 ...
looking at the first few records of the dataset
head(quakes, 5)
## lat long depth mag stations
## 1 -20.42 181.62 562 4.8 41
## 2 -20.62 181.03 650 4.2 15
## 3 -26.00 184.10 42 5.4 43
## 4 -17.97 181.66 626 4.1 19
## 5 -20.42 181.96 649 4.0 11
The following visualization below include density plot, bivariate plot made using the xyplot function of lattice package, and a 3d scatterplot using the cloud function of the lattice package respectively.
attach(quakes)
plot(density(depth))
plot(xyplot(lat ~ long| cut(depth, 3), data = quakes, layout = c(3, 1), xlab = "Longitude", ylab = "Latitude"))
plot(cloud(depth ~ lat * long | mag, data = quakes, zlim = rev(range(quakes$depth)), screen = list(z = 105, x = -70), panel.aspect = 0.9, xlab = "Longitude", ylab = "Latitude", zlab = "Depth"))
For the library andrews check this link to download the achived package: https://cran.r-project.org/src/contrib/Archive/andrews/
Using all datasets read in from above, we apply the andrews function (a function for making curves for multidimensional data visualization) from the andrews package
library("andrews")
par(mfcol=c(1,1))
andrews(CYGOB1,ymax=1.5)
andrews(quakes,ymax=3)
andrews(USairpollution,ymax=3)
id.pollution = rep(0,dim(USairpollution)[1])
id.pollution[outcity] = rep(2,length(outcity))
cbind(USairpollution,id.pollution)->USairP
attach(USairP)
## The following object is masked _by_ .GlobalEnv:
##
## id.pollution
## The following objects are masked from USairpollution:
##
## manu, popul, precip, predays, SO2, temp, wind
## The following object is masked from package:datasets:
##
## precip
andrews(USairP,type=4,clr=8,ymax=1.5)
stalac function of the MVA package used to detect and identify multivariate outliers and this was applied to the USairpollution data sets.
stalac(USairpollution)
andrews function applied once again to the iris data of the datasets package
data(iris)
andrews(iris,clr=5,ymax=3)
andrews(iris,type=4,clr=5,ymax=2)