“The greatest value of a picture is when it forces us to notice what we never expected to see.” John W. Tukey, Exploratory Data Analysis, 1977
woolA = warpbreaks[warpbreaks$wool=="A",]
stem(woolA$breaks,)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 1 | 0257888
## 2 | 114566689
## 3 | 00566
## 4 | 3
## 5 | 124
## 6 | 7
## 7 | 0
hist(woolA$breaks)
par(mfrow=c(1,2),mar=c(0,4,2,2)) # bottom, left, top , right
boxplot(Nile[time(Nile)<1898],main="Before Aswan Dam",
notch = T, ylim= c(600,1400),col = "lightyellow")
boxplot(Nile[time(Nile)>=1898],main="After Aswan Dam",
notch = T ,ylim= c(600,1400),col="lightgrey")
library(wesanderson)
curve(2*((x+1)^(1/2)-1),0,5,col= wes_palette(n=5, name="Darjeeling1")[2],
lwd=2, main = "Ladder of Power", xlab = "X",ylab="Y", ylim = c(0,3.0))
curve(3*((x+1)^(1/3)-1),0,5,col = wes_palette(n=5, name="Darjeeling1")[3],add=T,lwd=2)
curve(log(x+1),0,5,col=wes_palette(n=5, name="Darjeeling1")[1],add=T,lwd=2)
curve(-3*((x+1)^(-1/3)-1),0,5,col=wes_palette(n=5, name="Darjeeling1")[4],add=T,lwd=2)
curve(-2*((x+1)^(-1/2)-1),0,5,col=wes_palette(n=5, name="Darjeeling1")[5],add=T,lwd=2)
legend(0.2,2.8,c("Square Root", "Cubic Root", "Log", "1/Cube Root", "1/Square Root"),
col=wes_palette(n=5, name="Darjeeling1")[c(2,3,1,4,5)],pch=1)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
stock.data = getSymbols('005930.KS',from='2020-01-01',to='2020-12-31',auto.assign = FALSE)
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
colnames(stock.data) = c("open","high","low","close","volume","adjusted")
stock.data = stock.data[stock.data$volume>0,]
plot(stock.data$close,main = "Time-Series Plot of Stock Price")
par(mfrow=c(1,2))
boxplot(stock.data$close,main = "Boxplot of Stock Price")
plot(density(stock.data$close), main = "Desnity Plot of Stock Price")
par(mfrow= c(1,2))
boxplot(stock.data$close, main = "Original Data")
boxplot(log(stock.data$close),main = "P = 0 Transformation")
par(mfrow=c(2,2), mar = c(4,4,2,4))
set.seed(1)
sp = rnorm(50,mean = 1, sd = 2)
sp = sp[order(sp)]
qn = qnorm(seq(from=0.01,to=0.99,length=50))
plot(ecdf(sp), main = "Empirical CDF vs Theoretical CDF");lines(seq(-2,2,length=50),pnorm(seq(-2,2,length=50)),col="red")
plot(qn, sp, main = "Normal Q-Q Plot", xlim = c(-4,4),ylim = c(-4,4)); abline(0,1,col="red") ; abline(1,2,col="blue")
plot(pnorm(sp),seq(0.02,1,by=0.02), main = "Normal P-P Plot with mu = 0 , sd = 1"); abline(0,1, col="red")
plot(pnorm(sp,mean=1, sd=2),seq(0.02,1,by=0.02), main = "Normal P-P Plot with mu = 1 , sd = 2"); abline(0,1, col="red")
require(graphics)
## Deaths from sport parachuting; from ABC of EDA, p.224:
deaths = rbind(c(14,15,14),
c( 7, 4, 7),
c( 8, 2,10),
c(15, 9,10),
c( 0, 2, 0))
dimnames(deaths) <- list(c("1-24", "25-74", "75-199", "200++", "Not Reported"),
paste(1973:1975))
deaths
## 1973 1974 1975
## 1-24 14 15 14
## 25-74 7 4 7
## 75-199 8 2 10
## 200++ 15 9 10
## Not Reported 0 2 0
(med.d <- medpolish(deaths))
## 1: 19
## Final: 19
##
## Median Polish Results (Dataset: "deaths")
##
## Overall: 8
##
## Row Effects:
## 1-24 25-74 75-199 200++ Not Reported
## 6 -1 0 2 -8
##
## Column Effects:
## 1973 1974 1975
## 0 -1 0
##
## Residuals:
## 1973 1974 1975
## 1-24 0 2 0
## 25-74 0 -2 0
## 75-199 0 -5 2
## 200++ 5 0 0
## Not Reported 0 3 0
plot(med.d)
## Coded Plot
CodedPlot = function(x){
result = x
criteria = fivenum(x)
hspread = criteria[4]-criteria[2]
outerU = criteria[4]+3*hspread
outerL = criteria[2]-3*hspread
innerU = criteria[4]+1.5*hspread
innerL = criteria[2]+1.5*hspread
for(i in 1:ncol(x)){
for(j in 1:nrow(x)){
if(x[j,i]>outerU) result[j,i] = "P"
else if(x[j,i]>innerU) result[j,i] = "#"
else if(x[j,i]>criteria[4]) result[j,i] = "+"
else if(x[j,i]>criteria[2]) result[j,i] = "."
else if(x[j,i]>innerL) result[j,i] = "-"
else if(x[j,i]>outerL) result[j,i] = "="
else result[j,i] = "M"
}
}
return(result)
}
CodedPlot(deaths)
## 1973 1974 1975
## 1-24 "+" "+" "+"
## 25-74 "." "." "."
## 75-199 "." "=" "."
## 200++ "+" "." "."
## Not Reported "=" "=" "="
Introduction to Data Science by Rafael A. Irizarry : Chapter 28.Smoothing
library(scatterplot3d)
attach(iris)
scatterplot3d(Sepal.Length, Sepal.Width, Petal.Length)