bootstrap.R

#OTHER DATA MINING METHODS

#https://nicercode.github.io/guides/mcmc/
m =0
s =1
  set.seed(1)
samples=rnorm(10000,m,s)
mean(samples)

## [1] -0.006537039

summary(replicate(1000,mean(rnorm(10000,m,s))))

##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -0.0325000 -0.0057960  0.0004621  0.0004164  0.0067330  0.0355200

total=replicate(1000,mean(rnorm(10000,m,s)))
hist(total)

cummean=function(x){cumsum(x)/seq_along(x)}

plot(cummean(samples),type='l',xlab = "sample",ylab = "cumulative mean",panel.first = abline(h=0,col="blue"),las=1)

#Cholesky Decomposition
mat1=matrix(c(2,3,4,5,6,7,7,7,9),nrow=3,ncol=3,byrow=T)
mat1

##      [,1] [,2] [,3]
## [1,]    2    3    4
## [2,]    5    6    7
## [3,]    7    7    9

cm=chol(mat1)
cm

##          [,1]     [,2]      [,3]
## [1,] 1.414214 2.121320 2.8284271
## [2,] 0.000000 1.224745 0.8164966
## [3,] 0.000000 0.000000 0.5773503

t(cm) %*% cm

##      [,1] [,2] [,3]
## [1,]    2    3    4
## [2,]    3    6    7
## [3,]    4    7    9

crossprod(cm)

##      [,1] [,2] [,3]
## [1,]    2    3    4
## [2,]    3    6    7
## [3,]    4    7    9

#http://blogs.sas.com/content/iml/2012/02/08/use-the-cholesky-transformation-to-correlate-and-uncorrelate-variables.html
# in summary, you can use the Cholesky factor of a covariance matrix in several ways:
#   
#   To generate multivariate normal data with a given covariance structure from uncorrelated normal variables.
# To remove the correlations between variables. This task requires using the inverse Cholesky transformation.
# To quickly solve linear systems that involve a covariance matrix.


#FFT

#http://www.di.fc.ul.pt/~jpn/r/fourier/fourier.html

xs <- seq(-2*pi,2*pi,pi/100)
wave.1 <- sin(3*xs)
wave.2 <- sin(10*xs)
par(mfrow = c(1, 2))
plot(xs,wave.1,type="l",ylim=c(-1,1)); abline(h=0,lty=3)
plot(xs,wave.2,type="l",ylim=c(-1,1)); abline(h=0,lty=3)

wave.3 <- 0.5 * wave.1 + 0.25 * wave.2
plot(xs,wave.3,type="l"); title("Eg complex wave"); abline(h=0,lty=3)

wave.4 <- wave.3
wave.4[wave.3>0.5] <- 0.5
plot(xs,wave.4,type="l",ylim=c(-1.25,1.25)); title("overflowed, non-linear complex wave"); abline(h=0,lty=3)

# 
# Some concepts:
#   
#   The fundamental period, T, is the period of all the samples taken, the time between the first sample and the last
# The sampling rate, sr, is the number of samples taken over a time period (aka acquisition frequency). For simplicity we will make the time interval between samples equal. This time interval is called the sample interval, si, which is the fundamental period time divided by the number of samples NN. So, si=TNsi=TN
# The fundamental frequency, f0f0, which is 1T1T. The fundamental frequency is the frequency of the repeating pattern or how long the wavelength is. In the previous waves, the fundamental frequency was 12??12??. The frequencies of the wave components must be integer multiples of the fundamental frequency. f0f0 is called the first harmonic, the second harmonic is 2???f02???f0, the third is 3???f03???f0, etc.

repeat.xs     <- seq(-2*pi,0,pi/100)
wave.3.repeat <- 0.5*sin(3*repeat.xs) + 0.25*sin(10*repeat.xs)
plot(xs,wave.3,type="l"); title("Repeating pattern")
points(repeat.xs,wave.3.repeat,type="l",col="red"); abline(h=0,v=c(-2*pi,0),lty=3)

plot.fourier <- function(fourier.series, f.0, ts) {
  w <- 2*pi*f.0
  trajectory <- sapply(ts, function(t) fourier.series(t,w))
  plot(ts, trajectory, type="l", xlab="time", ylab="f(t)"); abline(h=0,lty=3)
}

# An eg
plot.fourier(function(t,w) {sin(w*t)}, 1, ts=seq(0,1,1/100))

#f(t)=0.5×sin(3wt)+0.25×sin(10wt)f(t)=0.5×sin(3wt)+0.25×sin(10wt) :
  
  acq.freq <- 100                    # data acquisition frequency (Hz)
time     <- 6                      # measuring time interval (seconds)
ts       <- seq(0,time,1/acq.freq) # vector of sampling time-points (s) 
f.0      <- 1/time                 # fundamental frequency (Hz)

dc.component       <- 0
component.freqs    <- c(3,10)      # frequency of signal components (Hz)
component.delay    <- c(0,0)       # delay of signal components (radians)
component.strength <- c(.5,.25)    # strength of signal components

f <- function(t,w) { 
  dc.component + 
    sum( component.strength * sin(component.freqs*w*t + component.delay)) 
}

plot.fourier(f,f.0,ts)   

# Phase shifts are translations in the x-axis for a given wave component. These shifts are measured in angles (radians).
# 
# Taking the previous example and shifting wave.1 by ??2??2 we would produce the following fourier series:
  
  #f(t)=0.5×sin(3wt+??2)+0.25×sin(10wt)

component.delay <- c(pi/2,0)       # delay of signal components (radians)
plot.fourier(f,f.0,ts)

# DC Components
# This concept deals with translations over the y-axis. In this case corresponds to an additive constant signal.
# 
# Applying a DC component of ???2???2 to the previous ware would result in the following equation and plot:
#   
#   f(t)=???2+0.5×sin(3wt+??2)+0.25×sin(10wt)

dc.component <- -2
plot.fourier(f,f.0,ts)

library(stats)
fft(c(1,1,1,1)) / 4  # to normalize

## [1] 1+0i 0+0i 0+0i 0+0i

fft(1:4) / 4

## [1]  2.5+0.0i -0.5+0.5i -0.5+0.0i -0.5-0.5i

# cs is the vector of complex points to convert
convert.fft <- function(cs, sample.rate=1) {
  cs <- cs / length(cs) # normalize
  
  distance.center <- function(c)signif( Mod(c),        4)
  angle           <- function(c)signif( 180*Arg(c)/pi, 3)
  
  df <- data.frame(cycle    = 0:(length(cs)-1),
                   freq     = 0:(length(cs)-1) * sample.rate / length(cs),
                   strength = sapply(cs, distance.center),
                   delay    = sapply(cs, angle))
  df
}

convert.fft(fft(1:4))

##   cycle freq strength delay
## 1     0 0.00   2.5000     0
## 2     1 0.25   0.7071   135
## 3     2 0.50   0.5000   180
## 4     3 0.75   0.7071  -135

#svm http://joelcadwell.blogspot.in/2016/05/using-support-vector-machines-as-flower.html

library(e1071)
data(iris)
attach(iris)

## classification mode
# default with factor response:
model <- svm(Species ~ ., data = iris)

# alternatively the traditional interface:
x <- subset(iris, select = -Species)
y <- Species
model <- svm(x, y)

print(model)

## 
## Call:
## svm.default(x = x, y = y)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.25 
## 
## Number of Support Vectors:  51

summary(model)

## 
## Call:
## svm.default(x = x, y = y)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.25 
## 
## Number of Support Vectors:  51
## 
##  ( 8 22 21 )
## 
## 
## Number of Classes:  3 
## 
## Levels: 
##  setosa versicolor virginica

# test with train data
pred <- predict(model, x)
# (same as:)
pred <- fitted(model)

# Check accuracy:
table(pred, y)

##             y
## pred         setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         48         2
##   virginica       0          2        48

# compute decision values and probabilities:
pred <- predict(model, x, decision.values = TRUE)
attr(pred, "decision.values")[1:4,]

##   setosa/versicolor setosa/virginica versicolor/virginica
## 1          1.196152         1.091757            0.6708810
## 2          1.064621         1.056185            0.8483518
## 3          1.180842         1.074542            0.6439798
## 4          1.110699         1.053012            0.6782041

# visualize (classes by color, SV by crosses):
plot(cmdscale(dist(iris[,-5])),
     col = as.integer(iris[,5]),
     pch = c("o","+")[1:150 %in% model$index + 1])

bootstrap.R

Dell

Sun Jan 15 10:50:59 2017