# Excercise report by Jacob Mars s113812 and Sissel Trap S112195.

# Day 1
# Excercise 1

#############
# Step 1
# Open the R studio.
# Check
# Step 2
# Type some expamples, matrices
x = matrix(nrow=3,nc=4,data=1:12)
# Step 3
# Save the command as a file with R. extension
# Check
# Step 4
# Install the knitr by using command:
# install.packages('knitr')
# Step 5
# Execute commands, use various buttons. Check
# Step 6
# Check the content of memory and recorded data by using funktion
ls()

## [1] "x"

# Step 7
# Removing all the objects by executing command
remove(list=ls())
# Step 8
# Save the session as a html notebook.
# Looks very nice indeed!
# Link : file:///C:/Users/Mars/Dropbox/Stokastisk%20simulation%2002443/Lektion1/Excercise1.html

###############
#Excercise 2
# Step 1
# Vector x is defined as: 
x=1:10
# Step 2
# Vector y is defined as the first 5 values of x
y=x[1:5]
# Step 3
# Vector z is defined as every second value of x
# Here the function seq(from=..,to=..,by=..) is being used, with from and to which defines the start and the end on vector x.
z=seq(from=x[1],x[10],by=2)
# Step 4
# Make the matrix x
x = matrix(nr=4,nc=5,data=1:20)
# 4 rows, 5 columns with 20 values (4*5=20), nrow(x),ncol(x), dim(x)
# Then we add 10 to row 1
x[1,]=x[1,]+10
# Then we add 20 to the column 3
x[,3]=x[,3]+20
# Step 5
# Defines y as the "transposed" matrix of x
y=t(x)
# Step 6
# Defines z as the produkt of the 2 matrices. Calculate x and y (%*%) and not by *.
z=x%*%y
# Step 7
# Read the help of read.table
# This function reads the file in table format, and creates a dataframe from it
# Step 8
# Create dataframe from the internetfile, using the functions, header, na.strings and row.names
w=read.table(file="http://www2.imm.dtu.dk/courses/02443/slides2013/data/data_ex2.txt",header=TRUE,na.strings="-999")

## Warning: cannot open: HTTP status was '404 Not Found'

## Error: kan ikke åbne forbindelsen

# There is an error because the file has been removed!!!
# row.names function defines the name of the rows
# na.strings creates a vector of strings, which are to be intepreted as NA value (-999)
# Header function shows if the file cantains names of the variables af is alway false or true
# Checking if w is defines as a dataframe
is.data.frame(w)

## Error: object 'w' not found

# TRUE . Yes, its a dataframe
# Step 9
# Correcting the name in 3 column, by using the function
colnames(w)=c("Name","Math","Physics","Chemistry","Global")

## Error: object 'w' not found

# Step 10
# Compute the mean by subjects (colums)
# The value NA can't be computed, and will need to be ignored when calculating the mean
# The same goes for column 5, which contains letters and needs to be numeric.
# Therefor, we make a new matrix, w1, excluding the names and letters.
w1=w[1:4,2:4]

## Error: object 'w' not found

# Now we can compute the mean of the columns and the rows. 
# The command na.rm=TRUE, indicates, that if there is a NA-value, it should be neglected from the computation
# Compute the mean by subjects (columns)
colMeans(w1,na.rm=TRUE)

## Error: object 'w1' not found

# Compute the mean by studens (rows)
rowMeans(w1,na.rm=TRUE)

## Error: object 'w1' not found

# Step 11
# Create a vector of logicals with the lenght of 4 containing for the grades > 10
# The physicsgrades are placed in the 3. column of matrix w. 
# We therefor make a new vector (t), which containts logical expressions i.e TRUE or FALSE for values > 10.
t=w[,3]>10

## Error: object 'w' not found

# Step 12
# Create a 4x3 metrix of logials containing TRUE for grades > 10.
# If we check for values > 10 in the original matrix w, we recieve a warning that it contains non logical expressions
# This is because letters (names) cannot be compared to numbers. We can however factor or assign number to letters if needed.
# We therefor run the function on the matrix w1 (previously used), containing only column 2,3 and 4 of matrix w.
r=w1>10

## Error: object 'w1' not found

#############
# Excercise 3.1
# Create a vector containing the 100 first numbers of the Fibunacci sequence
Fibonacci <- numeric(100) # This is the first 100 numeric values of Fubinacci.
Fibonacci[1] <- Fibonacci[2] <- 1 # Setting the value of the first and the second Fubinacci number as 1.
# The subsequent elements are defined as the sum of the preceding two elements. Now we can use the for() to write a loop:
for (i in 3:100) {Fibonacci[i] <- Fibonacci[i - 2] + Fibonacci[i - 1]}
Fibonacci

##   [1] 1.000e+00 1.000e+00 2.000e+00 3.000e+00 5.000e+00 8.000e+00 1.300e+01
##   [8] 2.100e+01 3.400e+01 5.500e+01 8.900e+01 1.440e+02 2.330e+02 3.770e+02
##  [15] 6.100e+02 9.870e+02 1.597e+03 2.584e+03 4.181e+03 6.765e+03 1.095e+04
##  [22] 1.771e+04 2.866e+04 4.637e+04 7.502e+04 1.214e+05 1.964e+05 3.178e+05
##  [29] 5.142e+05 8.320e+05 1.346e+06 2.178e+06 3.525e+06 5.703e+06 9.227e+06
##  [36] 1.493e+07 2.416e+07 3.909e+07 6.325e+07 1.023e+08 1.656e+08 2.679e+08
##  [43] 4.335e+08 7.014e+08 1.135e+09 1.836e+09 2.971e+09 4.808e+09 7.779e+09
##  [50] 1.259e+10 2.037e+10 3.295e+10 5.332e+10 8.627e+10 1.396e+11 2.259e+11
##  [57] 3.654e+11 5.913e+11 9.567e+11 1.548e+12 2.505e+12 4.053e+12 6.557e+12
##  [64] 1.061e+13 1.717e+13 2.778e+13 4.495e+13 7.272e+13 1.177e+14 1.904e+14
##  [71] 3.081e+14 4.985e+14 8.065e+14 1.305e+15 2.111e+15 3.416e+15 5.528e+15
##  [78] 8.944e+15 1.447e+16 2.342e+16 3.789e+16 6.131e+16 9.919e+16 1.605e+17
##  [85] 2.597e+17 4.202e+17 6.799e+17 1.100e+18 1.780e+18 2.880e+18 4.660e+18
##  [92] 7.540e+18 1.220e+19 1.974e+19 3.194e+19 5.168e+19 8.362e+19 1.353e+20
##  [99] 2.189e+20 3.542e+20

#Excercise 3.2
remove(list=ls())
# Write a function returning the real roots of a second order polynomial:
# f=a*x^2+b*x+c where as the constants are stored as:
a=10; b=2; c=-5
# The roots of a second order polynomial, can be determined by the quadratic function:
# a*x^2+b*x+c=0
# The quadratic equation is a second-order polynomial equation, so it has two solutions (x1,x2) which can be found:
# x1=(-b-sqrt(b^2-4*a*c))/2*a
# x2=(-b+sqrt(b^2-4*a*c))/2*a
# However, we can use the function "uniroot()" to find the roots of the function.

f <- function (x)
{ return ( a*x^2+b*x +c)}

plot(f)

plot of chunk unnamed-chunk-1

# Find the root , and a lot more information .
# We can now find the roots, using the uniroot()-function, defining an interval from 0 to 100.
TheRoot <- uniroot(f, c(0 , 100));
TheRoot $ root # Returns the numerical value of the second order polynomial.

## [1] 0.6141

#############
# Excercise 4
# 4.1 Load the file as a data.frame
Stock=read.table("http://www2.imm.dtu.dk/courses/02443/projects/data/Eurostoxx50.txt")
# Now the data is stored as "Stock".
# 4.2
#Plot the variation of the prices as a function of time on the same plotting window:
plot(Stock,main='Stock prices') # This will result in a matrix scatterplot.

plot of chunk unnamed-chunk-1

# The coloumns 2,3 and 4 and different stock prices as a function of time. 
Stock1=Stock[,2]
Stock2=Stock[,3]
Stock3=Stock[,4]
# Now we can plot the 3 different stocks:
plot(Stock1,main='Stock Prices', xlab='Time', ylab='Price',las=1,cex=0.5,col=2,xlim=c(0,1500),ylim=c(0,50))
lines(Stock2,main='Stock Prices', xlab='Time', ylab='Price',las=1,cex=0.5,col=3)
lines(Stock3,main='Stock Prices', xlab='Time', ylab='Price',las=1,cex=0.5,col=4)
# Now we will add the legend
labels <- c("Stock1","Stock2","Stock3")
legend("topright",inset=0.05,title="Stock Prices",labels, lwd=2, col=c("red","green","blue"))

plot of chunk unnamed-chunk-1

##############
#Excercise in pseudo RNG simulation
remove(list=ls())
# The congruence method
a=16807 ; b=0 ; M=2147483647
n = 1000 
# Number of iterations (simulates 1000 realisations on [0,1]) - the higher number n, the more precise. 
# The number of times it go into M
x = numeric(length=n)
x[1] = 3
for(i in 2:n)
{  x[i] = (a*x[i-1] + b) %% M }
x = x/M
# Make somc checks of ramdomness
# 1) Insert x into a histogram and insert a pink line to show the mean
hist(x,prob=TRUE)
u = seq(0,1,0.01)
lines(u,dunif(u),col=6)

plot of chunk unnamed-chunk-1

# 2) Illustrade graphically the distribution by a scatter of (Xk, Xk+2)
# Scatter plots are appropriate for examining the relationship between 2 numeric variables
# We displace displace the function (x) by 3, to check for any patterns
plot(x[1:998],x[3:1000],main='Scatterplot',xlab='x',ylab='X',las=1,cex=0.5,col=2)

plot of chunk unnamed-chunk-1

# 3) Plot the auto-correlation function
# This shows the similarity betweem observations as a function of time lag between them. 
acf(x,main='Auto-correlation Plot',las=1, ylim=c(-0.1,0.1))

plot of chunk unnamed-chunk-1

# We check the plot for correlation. Concluding that the correlation is very small, about 0.02.
# 4) Statistical test of the hypothesis that the simulated values are i.i.d. realisations of a U[0,1] distribution.
# Test if RNG pass the KS test
# This test is non-parametric and distribution free, and test if 2 datasets differ significantly.
# D is defined as the maximum vertical deviation between the two curves.
# The P-value measures how they differ, and then if the data conflicts with H0. Reject the null-hypothetis if small p-value.
ks.test(x,"punif",alternative="two.sided")

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  x
## D = 0.043, p-value = 0.04975
## alternative hypothesis: two-sided

# This test results in a very small p-value, which leads to a rejection of H0.
# We therefore try and run more simulations (higher n-value), in order to have a better basis for decision
n1=30000
x = numeric(length=n1)
x[1] = 3
for(i in 2:n1)
{  x[i] = (a*x[i-1] + b) %% M }
x = x/M
ks.test(x,"punif",alternative="two.sided")

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  x
## D = 0.0043, p-value = 0.6487
## alternative hypothesis: two-sided

# We can now conclude, that this sample comes from a uniform distribution
# Repeat different steps with different values of the parametres a, b and M
# b is a constant used in the equation  x[i] = (a*x[i-1] + b) %% M.
# By entering a value b=12345, results in a p-value=0.91
# a is the the multiplier in the equation. Changing a, can result in major erros running in functions and plots.
# Modulus, as previously mentioned, needs to be large enough to avoid discretization issues, but no larger than the limit.

##################
remove(list=ls())
# Day 2 - Excercise 1
# Exercise 1.1
# Simulate a sample from a Cauchy(0,1) distribution with the inversion method, given density f and fcdf.
n=1000
f=function(x)
{1/(pi)*(1+x^2)}
# cdf 
fcdf=function(x)
{1/2+1/(pi)*arctan(x)}
# The inverse of the cdf is found by standard mathematical procedure.
Fnif=function(x)
{tan(pi*(x-1/2))}
x<-runif(n=1000,0,1)
# Simulate 1000 numbers from the distribution and store them as the function:
y= Fnif(x)
# Plot the empirical histogram of the cauchy distributet values
hist(y[abs(y)<20],breaks=100,prob=TRUE,col=5,main='Histogram',las=1)
# We can insert the distribution as points, to see that pattern centerede around the middle.
points(y,rep(0,n),col=4)
# Compare to the target density, we see some kind of overlab.
# Picking a interval between -20 and 20, because it seemed fitting (fewer points futher out).
z=seq(-20,20,0.1) # Making a sequence in the interval [-20:20] with 0.1 interval.
zy=dcauchy(z,0,1) # Density distribution from the given Cauchy parametres and interval.
points(z,zy,col=10)

plot of chunk unnamed-chunk-1

############
rm(list=ls())

# Exercise 1.2
# Uniform method
# Simulate a sample from a Beta(2,5) distribution using the rejection method with a
# uniform distribution as auxiliary distribution.
# With the rejection method you find a function which is "bigger" than the function  that you want to find.
# q is the vektors of quantiles.
N=10000
x=runif(1000) 
u=runif(1000)
# We want to define a constant c, which is set as low as possible, in order to not get too many "unnecessary" points.
c=3
# We are now going to find a random point on the x- and y-axsis.
# The rescaled auxcility density is defined as S.
s=c*u
# Now we want to check if the point is within the graph, i.e if c*g(x)<=f(x).
acc = s<=dbeta(x,2,5)
plot(x[acc],s[acc],col=1) # Showing black dots within the limits of the function.
points(x[!acc],s[!acc],col=2) # Show red dots above the limits of the function.

plot of chunk unnamed-chunk-1

hist(x[acc],breaks=100,prob=TRUE,col=5,main="Uniform Method",xlab='X',ylab='Y',las=1) # Histogram plot
lines(seq(0,0.7,0.01),dbeta(seq(0,0.7,0.01),2,5),col=6,'l',las=1) # Adding a line to the plot.

plot of chunk unnamed-chunk-1

##############
rm(list=ls())

# Exercise 1.3 
# Simulate a sample from an N(0,1) distribution with the rejection method using
# the double exponential distribution g(x) = (alfa)/2exp(-alfa)x) as auxiliary variable.
n=1000 # Number of observations
# The X-axis is again uniform and the y-axis is expotential. 
z=runif(n,0,1) # Generating n random deviates in the interval [0,1]
# Selecting the middel of the interval (0,5) and multiplice the half with -1 and the other half with 1, thereby achieving a double exponential distribution.
z[z<0.5]<- -1
z[z>0.5]<- 1
alpha = 1
# Defines x and y
x <- z*rexp(n,1)
y <- runif(n,0,(alpha)/2*exp(-alpha*abs(x))*2)
acc <- y < dnorm(x,0,1)
plot(x[acc],y[acc],col=1,xlim=c(-6,6),ylim=c(0,1))
points(x[!acc],y[!acc],col=2)

plot of chunk unnamed-chunk-1

#############
rm(list=ls())
# Exercise 1.4
# One throws a needle of length l at random on a parquet floor with planck width l
# Study by simulation this experiment and estimate the probability of the needle to intersect a line between two planks?
l=1 # Lenght of the needle.
n=10000 # Number of throws.
# We have 2 variables; A which is the angle of the needle, and x which is the initial position of the needle.
A=runif(min=0, max=(pi),n=10000) # angle of the needle from [0:pi] because of the symmetri.
x=runif(min=0,max=l,n=10000) # Interval of the plank, which the needle can land on.
y=(sin(A)*l/2) # Length of the needle on the x-axis.
# Now we have the functions and critera for a model
# We can now make a loop to test every possible outcome, and count how many times the needel tuch the line.
count=0 # Setting our initial count
for(i in 1:n){if(y[i]>=x[i]){count=count+1}}
# {} defines the region of the loop for each y >= x (needle crosses or touches the line)
count #Display the number of times the needle intersect the line between the planks.

## [1] 3250

# We can now find the probability by taking the total amount of simulations, divided by the times the needles intersected (count)
n/count

## [1] 3.077

# We can see that the needle intersect the line on every n/count try.
# This is very close to pi, assuming that if we could increase the number of simulations, the probability would come closer to the value of pi.

##########
rm(list=ls())

# Exercise II - 5
# You want to sell your car. You receive a first offer at a price of 12 (say in K Euro) Since it
# is the first offer, you decide to reject it and wait a little bit to \smell" the market.
# You reject offers and wait until you get the first offer strictly larger than 12.
# Study by simulation the waiting time (counted in number of offers untill the sell). 
# You can for instance estimate the expectation of the waiting time. Assume that the Xi
#s are independant and identically distributed Poisson(10)
#. Consider the solution conditionally on X1 = 12
# then unconditionally (i.e. in
# average over all possible X1 values under the assumption that the Xi's are i.i.d
# Poisson(10)). The use of rpois is allowed in this exercise.
# Estimate the expectation of waiting time.
# We use the rpoist() function to simulate n independant Poisson random variables
n=1000
v<-rpois(n,10) # Generates n random numbers from the Poisson distribution.
# Can locate the numbers less than or equal to 12, and label them as false.
v[1:1000]<=12

##    [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
##   [12] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##   [23]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##   [34]  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
##   [45]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##   [56]  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
##   [67]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE
##   [78]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##   [89]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE
##  [100] FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE
##  [111]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
##  [122]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [133] FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE
##  [144] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
##  [155]  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [166]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
##  [177]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##  [188]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
##  [199]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
##  [210]  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE
##  [221]  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE
##  [232]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE
##  [243] FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE
##  [254] FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##  [265]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [276] FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [287]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [298]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [309]  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE
##  [320]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE
##  [331] FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [342]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE
##  [353] FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE
##  [364]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [375]  TRUE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
##  [386]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [397]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE
##  [408]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [419] FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [430]  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [441]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [452]  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [463]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [474]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [485]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [496]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [507]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [518] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [529]  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
##  [540]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [551]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [562] FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [573] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [584]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
##  [595]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
##  [606]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE
##  [617]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE
##  [628]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
##  [639]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [650]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [661]  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE
##  [672]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [683]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE
##  [694]  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [705]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [716]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [727]  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
##  [738]  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [749]  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
##  [760]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
##  [771]  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
##  [782] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE
##  [793] FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [804]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE
##  [815]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##  [826]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##  [837]  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [848]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
##  [859] FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE
##  [870]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [881] FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE
##  [892]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [903]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [914]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [925]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE
##  [936]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [947]  TRUE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [958]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [969] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [980]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [991]  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE

# We can then call the function to locate the position of the buyer offering 12k or more:
which((v[1:1000]<=12)==FALSE)

##   [1]  10  12  25  30  36  39  49  51  58  61  65  72  74  75  81  98  99
##  [18] 100 102 105 107 108 109 110 120 126 132 133 138 140 144 151 152 153
##  [35] 154 157 158 161 168 175 179 184 192 196 207 212 215 217 222 224 225
##  [52] 227 230 234 239 240 243 247 248 251 253 254 256 261 267 275 276 279
##  [69] 301 304 311 314 316 317 318 328 330 331 333 345 348 350 353 356 359
##  [86] 360 363 376 379 380 381 388 392 404 405 409 418 419 423 432 433 436
## [103] 445 451 453 455 465 469 499 506 510 518 530 531 534 535 546 562 565
## [120] 568 573 579 586 593 604 611 614 623 627 632 637 644 656 663 664 669
## [137] 671 673 690 692 695 698 699 707 730 731 736 739 740 753 754 758 768
## [154] 769 772 775 776 780 782 789 791 793 794 803 810 813 822 829 833 838
## [171] 841 851 857 859 861 864 867 871 881 884 886 888 890 898 927 932 933
## [188] 939 949 951 952 959 964 969 986 992 993 997

# We can then use the diff-function to find number of costumers between the buyers offering 12k or more.
diff(which((v<=12)==FALSE))-1

##   [1]  1 12  4  5  2  9  1  6  2  3  6  1  0  5 16  0  0  1  2  1  0  0  0
##  [24]  9  5  5  0  4  1  3  6  0  0  0  2  0  2  6  6  3  4  7  3 10  4  2
##  [47]  1  4  1  0  1  2  3  4  0  2  3  0  2  1  0  1  4  5  7  0  2 21  2
##  [70]  6  2  1  0  0  9  1  0  1 11  2  1  2  2  2  0  2 12  2  0  0  6  3
##  [93] 11  0  3  8  0  3  8  0  2  8  5  1  1  9  3 29  6  3  7 11  0  2  0
## [116] 10 15  2  2  4  5  6  6 10  6  2  8  3  4  4  6 11  6  0  4  1  1 16
## [139]  1  2  2  0  7 22  0  4  2  0 12  0  3  9  0  2  2  0  3  1  6  1  1
## [162]  0  8  6  2  8  6  3  4  2  9  5  1  1  2  2  3  9  2  1  1  1  7 28
## [185]  4  0  5  9  1  0  6  4  4 16  5  0  3

# Finally, we can now calculate the average waittime between buyers offering 12k or more, by taking the mean.
mean(diff(which((v<=12)==FALSE))-1)

## [1] 4.01

##############
rm(list=ls())
#Execise II - 6
lambda=10
n=200
# Simulate a single dataset of n=200 observations from a Poisson distribution with parametre 10.
x=rpois(n,lambda)
# We can estimae the theoretical median from the sample by using the function:
y=median(x,na.rm=FALSE);y

## [1] 10

# Bootstrapping is a method, which lets us compute estimated confidens interval and standard errors.
# We are now able to resample a given set of data, x, by using the sample function.
sample(x,n,replace=FALSE)

##   [1]  7 15 13 11 11  9  7  8 13  7 10  7 13 12  8  8 12  6 13 11 18 12 12
##  [24] 10 13 11  9  9 14  5  7  8  7  8 13 10 12 15 14  4  9  5 13  7  9  6
##  [47] 10 10 10  9  9  6  6  8  9 15 15  9 16  7 12  8 15  8 13 11  8 19  6
##  [70] 12 11 12  8 10 12  8  9  9 10  6 10 12 10  7  7 10 11  7 15 13 13  5
##  [93] 13  9  9 13 13 10  9 16 11 10  9 12  9 10  9 11 12 11  9 12  7  6 11
## [116] 10 11 12  8 14  6 13 12 11 13  7 14 10 14 10 12 12 11  9 18  8  8 11
## [139] 11  8 10 10 10 14 12 11 10 14  7 11  6 16 18 12  9  8  9  9  6 10  6
## [162]  7 12  8 15  7  8 12  8  6 12  6 10  8 11  9 10 14 13 13 12  6 13 19
## [185] 10 12 12 10 11  7 13 11 10  9 13 12 11  8  3 10

# x is the a vector containing the data. n is the size of the population being resampled. Replace determines that the sample drawn, will not be replaced.
# We can now obtain some bootstrap samples, from the orgininal poisson distributed sample.
resamples <- lapply(1:200, function(i)
  sample(x, replace = T))
# We have made 200 resamples from the original random poisson distribution. Let's display the first:
resamples[1]

## [[1]]
##   [1] 13 10 13  7  7 10 14 12 10 12 10  9 12 11  8 11  8 10 11 10 15 10 15
##  [24] 10 12  8  9 11 12 12 12 12 18 11 13 12 12  7 10  9  9 11  7 13  6  8
##  [47]  6  7 10 12  8  6  7  6 11 14 13 10  9 13 13 15 10 14  8 10  8 10  7
##  [70] 16  8  6 15 10 12 16  5  9 10  7 14 13 19 12 18 10  8 10 10  9  7 15
##  [93]  9 12  6 16  8 10 13  6 10 13 12  8 14  9 12  8  6  6  8  4  6  8 12
## [116]  7  8  6  9  9 10 12 13  8 13 11 10  9 11 10 11 10 12 10 14 12 12  7
## [139] 13  8  8  9 15 11  8 10 15 10 13 10 13  3  9 18 12  9 18  8  9  8 12
## [162] 11 10 12  7  9  7  8 10  9  7 13  8 11 13  6 12 19 10 15 16 12 15 12
## [185]  8 10 15  6 10  6 19 12  6  9  8  7 13 11 10  7

# We can now calculate the median for each bootstrap sample, a total of 200 samples:
r.median <- sapply(resamples, median)
r.median

##   [1] 10.0 10.0 10.0 11.0 10.0 10.0 11.0 10.0 10.0 10.0 10.0 10.0 10.0 10.5
##  [15] 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 11.0 10.0
##  [29] 10.0 10.0 10.0 10.5 11.0 10.0 10.0 11.0 10.0 10.0 10.0 10.0 10.0 10.5
##  [43] 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.5 10.0
##  [57] 10.0 10.0 11.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 11.0 10.0
##  [71] 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 11.0 10.0 10.0 10.0
##  [85] 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0
##  [99] 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.5 11.0 10.0 10.0
## [113] 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 11.0 10.0 11.0 10.0 10.0 10.0
## [127] 10.0 10.0 10.0 10.0 10.0 10.0 10.0 11.0 10.0 10.0 10.0 10.0 10.0 10.0
## [141] 10.0 10.0 10.0 10.0 10.0 11.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0
## [155] 10.0 10.0 10.0 10.0 10.0 10.5 10.0 10.0 11.0  9.0 10.0 10.0 10.0 10.0
## [169] 10.0 10.0 10.0 10.0 10.0 10.0 10.0 11.0 10.0 10.0 10.0 10.0 10.0 10.0
## [183] 11.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 10.0 11.0 10.0
## [197] 10.0 10.0 11.0 10.0

mean(r.median) # Showing the mean of the medians of the bootstrap samples

## [1] 10.1

# We can now calculate the varians and standard deviation of the distributions of means
sqrt(var(r.median))

## [1] 0.3049

# Let's display the distribution of medians by a histogram to get a good overview
hist(r.median,main='Histogram of median',xlab='Median',ylab='Frequency',las=1,col=2)

plot of chunk unnamed-chunk-1

# We can conclude that the standard deviation is very small. We can also see in the histogram, 

###########
rm(list=ls())
# Exercise II - 7
#Simulate a sample of size 50000 from a bivariate centred, standardized Gaussian vector
#with correlation coefficient p=0.7. Plot this sample. Extract a sample of a random
#variable approximately distributed as Y2jY1 = :5. Estimate its mean and variance. Plot its
#empirical histogram and compare it to a normal density with same parameters as the one simulated above
# First we are going to define the target density of the distribution
n=50000
sd=1
rho=0.7
Sigma=matrix(nr=2,nc=2,data=c(sd,rho,rho,sd))
Sigma

##      [,1] [,2]
## [1,]  1.0  0.7
## [2,]  0.7  1.0

# Next step is to use Choleski factorisation, which transforms it into a lower triangular matrix (density form) and its conjugate transpose.
U = chol(Sigma) ; L = t(U)
L

##      [,1]   [,2]
## [1,]  1.0 0.0000
## [2,]  0.7 0.7141

##      [,1]   [,2]
## [1,]    1 0.7000
## [2,]    0 0.7141

L %*% U ## Checking if the produkt is equal to Sigma

##      [,1] [,2]
## [1,]  1.0  0.7
## [2,]  0.7  1.0

# We can now try to simulate 2 sample sizes from a normal random distribution:
x1 = rnorm(n) ; x2 = rnorm(n)
X = rbind(x1,x2)
Y = L %*% X
Y = t(Y)
# Let's try and plot the samples to get an overview.
plot(Y,asp=1,main='Sample plot',las=1,cex=1,col=2)
library(MASS)
points(x1,x2,asp=1,cex=1,col=3)
contour(kde2d(x1,x2),asp=TRUE,theta=20,phi=30,add=TRUE)

plot of chunk unnamed-chunk-1

# Extract a sample of random variable approximately distributed as Y2|Y2=0.5.
plot(Y,asp=1,main='Sample plot',las=1,cex=1,col=2)
abline(v=.5,col=1,lty=1)

plot of chunk unnamed-chunk-1

# The sample needs to be distributed from the line Y2|Y2=0.5.
dim(Y) # Displays the dimension of the matrix nrow=50.000, ncol=2.

## [1] 50000     2

# We wish to search for the values of Y2 in coloumn 2, equals to Y1=0.5.
# We need to construct an interval, since there are no values excactly equal 0.5:
Y1<-Y[Y[,1]>0.495 & Y[,1]<0.505,]
Y1 # So we have the number if points equal approximately distributed as Y2|Y2=0.5.

##          [,1]      [,2]
##   [1,] 0.4964  0.854954
##   [2,] 0.5027  0.725253
##   [3,] 0.5014  0.311471
##   [4,] 0.4962  0.701781
##   [5,] 0.5035 -0.577166
##   [6,] 0.5011  0.515152
##   [7,] 0.4990 -0.871534
##   [8,] 0.4989  0.562236
##   [9,] 0.5007 -1.388096
##  [10,] 0.4991  2.098927
##  [11,] 0.5049 -0.307226
##  [12,] 0.4985  2.125996
##  [13,] 0.5030  0.136844
##  [14,] 0.5042 -0.190441
##  [15,] 0.4992  1.142735
##  [16,] 0.5028  1.142723
##  [17,] 0.4984  0.712655
##  [18,] 0.4976  0.669138
##  [19,] 0.5023  1.346354
##  [20,] 0.5012  0.453402
##  [21,] 0.5033 -0.210814
##  [22,] 0.4967 -1.108224
##  [23,] 0.4984  0.297835
##  [24,] 0.4952 -0.221610
##  [25,] 0.5002  0.763923
##  [26,] 0.5007 -0.168062
##  [27,] 0.5009 -0.081717
##  [28,] 0.4969  0.930654
##  [29,] 0.5017 -1.156841
##  [30,] 0.4995  0.142039
##  [31,] 0.5028 -1.149211
##  [32,] 0.5036  0.236041
##  [33,] 0.4991  0.983963
##  [34,] 0.4954  0.812913
##  [35,] 0.4975 -0.654391
##  [36,] 0.4989  0.443274
##  [37,] 0.4989  2.286942
##  [38,] 0.5042 -1.129692
##  [39,] 0.5015  0.667935
##  [40,] 0.5002 -0.177691
##  [41,] 0.5005  0.821922
##  [42,] 0.5004 -0.034824
##  [43,] 0.5047  0.295374
##  [44,] 0.4979  0.572542
##  [45,] 0.5015  0.114170
##  [46,] 0.4991 -0.166782
##  [47,] 0.4958 -0.348329
##  [48,] 0.5011  1.129853
##  [49,] 0.5021  0.349858
##  [50,] 0.4951  0.774412
##  [51,] 0.5005 -0.605938
##  [52,] 0.4987 -0.299387
##  [53,] 0.4964  0.562415
##  [54,] 0.5022  0.597230
##  [55,] 0.4955  0.223708
##  [56,] 0.5013 -0.351459
##  [57,] 0.4991 -0.687432
##  [58,] 0.5040  0.909582
##  [59,] 0.4994  0.635601
##  [60,] 0.5041 -0.471536
##  [61,] 0.5004  0.445324
##  [62,] 0.5049 -0.086720
##  [63,] 0.4996 -0.225460
##  [64,] 0.5011  0.035945
##  [65,] 0.4953  0.789276
##  [66,] 0.5049  1.310612
##  [67,] 0.4952  0.285145
##  [68,] 0.4956  0.371470
##  [69,] 0.5017  1.547728
##  [70,] 0.4959  1.449926
##  [71,] 0.5034  0.662610
##  [72,] 0.4973  1.834912
##  [73,] 0.5001  1.062146
##  [74,] 0.4992  0.191571
##  [75,] 0.4966  0.226955
##  [76,] 0.4991 -0.588816
##  [77,] 0.5007 -0.178484
##  [78,] 0.5017  1.635564
##  [79,] 0.4967 -0.076373
##  [80,] 0.5011  1.749393
##  [81,] 0.5029  0.622376
##  [82,] 0.5047  2.046417
##  [83,] 0.4986 -0.698481
##  [84,] 0.5004  1.272797
##  [85,] 0.5016  2.004642
##  [86,] 0.5025  0.955029
##  [87,] 0.5022  0.696945
##  [88,] 0.4956  0.305274
##  [89,] 0.4999 -0.489057
##  [90,] 0.5044 -0.294516
##  [91,] 0.5018 -1.181767
##  [92,] 0.5001  0.615415
##  [93,] 0.5024  1.459453
##  [94,] 0.4973  1.296910
##  [95,] 0.5020  0.727207
##  [96,] 0.4966  0.539773
##  [97,] 0.5016  0.571736
##  [98,] 0.4986 -0.562355
##  [99,] 0.5038 -0.562179
## [100,] 0.4952 -0.385598
## [101,] 0.4985 -0.504727
## [102,] 0.4991  1.611966
## [103,] 0.5012  1.178127
## [104,] 0.4992  0.408318
## [105,] 0.4955  1.578451
## [106,] 0.5046 -0.168980
## [107,] 0.5022  0.457926
## [108,] 0.5024  0.949601
## [109,] 0.4958  0.995425
## [110,] 0.5049  0.516885
## [111,] 0.5049  1.169376
## [112,] 0.5048 -0.914484
## [113,] 0.5024  0.171720
## [114,] 0.5024  0.659154
## [115,] 0.5037 -0.156677
## [116,] 0.4995  0.864619
## [117,] 0.5007  1.070076
## [118,] 0.4957  1.404653
## [119,] 0.4999  0.836892
## [120,] 0.5010  1.138925
## [121,] 0.4981  0.480606
## [122,] 0.4992 -0.050260
## [123,] 0.4971  0.342233
## [124,] 0.5010  0.159401
## [125,] 0.5011 -0.121083
## [126,] 0.4996  0.194060
## [127,] 0.5029  0.328759
## [128,] 0.4968  0.290576
## [129,] 0.5013  0.414440
## [130,] 0.5001  0.351721
## [131,] 0.4951  0.081229
## [132,] 0.5022  1.166530
## [133,] 0.4969  1.132236
## [134,] 0.4973 -0.265045
## [135,] 0.5035  0.445896
## [136,] 0.4954 -0.173298
## [137,] 0.4999 -0.249238
## [138,] 0.4965 -0.194217
## [139,] 0.5048  0.015319
## [140,] 0.5003  1.194147
## [141,] 0.5018  0.412318
## [142,] 0.5019  1.979988
## [143,] 0.4987  1.015300
## [144,] 0.4983 -0.292398
## [145,] 0.5023  0.822679
## [146,] 0.4978  1.193476
## [147,] 0.4981 -0.159931
## [148,] 0.4994  0.227656
## [149,] 0.5038  0.874623
## [150,] 0.5030  0.457989
## [151,] 0.5022  0.054110
## [152,] 0.5023  0.004729
## [153,] 0.4997 -0.045211
## [154,] 0.5017 -0.673304
## [155,] 0.4964  0.127056
## [156,] 0.4969  0.808623
## [157,] 0.4965  0.638774
## [158,] 0.4985  1.660579
## [159,] 0.4997  0.625845
## [160,] 0.5047  0.702918
## [161,] 0.5031  0.464154
## [162,] 0.4968  0.061944
## [163,] 0.5028  1.943311
## [164,] 0.5016  1.749757
## [165,] 0.4954  0.679065
## [166,] 0.5047 -0.162072
## [167,] 0.4964  1.002970
## [168,] 0.5034  0.440952
## [169,] 0.5021  0.495395
## [170,] 0.5008 -0.342009
## [171,] 0.5017  0.270296
## [172,] 0.4959  1.675694

# We can now extract a sample of a random variable from this distribution:
sample(Y1,1,replace=FALSE)

## [1] 0.4985

mean(Y1) # Calculating the mean of the Y2|Y2=0.5 sample distribution.

## [1] 0.4669

var(Y1) # The varians would be smaller if we sampled from a smaller interval.

##            [,1]       [,2]
## [1,]  7.811e-06 -0.0001459
## [2,] -1.459e-04  0.5635288

hist(Y,breaks=100,prob=TRUE,col=5,main='Histogram',las=1)

plot of chunk unnamed-chunk-1

hist(Y1,breaks=100,prob=TRUE,col=5,main='Histogram',las=1)

plot of chunk unnamed-chunk-1

##########
rm(list=ls())
# Day 4
# Exercises step 1-6
# Step 1
# Simulate a Markov chain
# We use a method to find out if the value is mostly 1 or 0, like if we had a coin.
# because S is given to either 1 or 0.
x1<-rep(NA,1)
x2<-rep(NA,2)
N=1000
# The proberbility that we get for 1 after a 0 is p1=1/sqrt(2)
p1<-1/sqrt(2)
# The proberbility that we get fr 1 after a 1 is 1/pi
p2<-1/pi
# we don't need to look at how the posibility is for 0 (the other side) because it's the same.
# We create a loop to collect the data to from the binomial distribution.
for(i in 1:N) 
{
  x1[i]<-rbinom(1,1,p1)
  x2[i]<-rbinom(1,1,p2)
}
# Insert a histogram that can show the probability, det ses at der er flest 0'er og at det samlede antal er de 1000 = N
# It shows that there are a 70 % and 30 % destribution.  
hist(x2,prob=T,col=3, main="Markov chain",xlab='s={1,0}',ylab='Proberbility')

plot of chunk unnamed-chunk-1

############
# Step 2
# Simulate a Markov chain with the Metropolis algorithm with the 
# Cauchy distribution C(0;1) as a target distribution. 
# Use e.g. a normal pdf with 0 mean and a unit variance as a proposal q.
rm(list=ls())
n=1000
x<-rnorm(n,mean=0,sd=1)
f=function(x)
  {1/((pi)*(1+x^2))}
q=function(x)
  {1/(sqrt(s*n))*exp(-x^2/2)}

# We make a loop
for (i in 1:n-1)
  {
y<-rnorm(n=1, mean=0,sd=1)
# Compute the ratio
R <- (f(y) * dnorm(x[i],m=0,sd=1)) / (f(x[i]) * dnorm(y,m=0,sd=1))   
# Compute
p <- min(1,R)
# Defines acceptense with the p and if it is not accepted then gets X[i+1] with ifelse-function.
accept<-rbinom(n=1,size=1,prob=p)
x[i+1] <-ifelse(accept == 1,y,x[i])}
# Make histogram and curve to show the result
hist(x,prob=T,col=3, main="Markov chain with the Metropolis algorithm",xlab='C={0,1}',ylab='Proberbility')
curve(dcauchy(x),add=TRUE, col=2)

plot of chunk unnamed-chunk-1

acf(x)

plot of chunk unnamed-chunk-1

# The acf-graph looks fine because it's with no deviation.

##########
# Step 3
# Same as previous exercise with a Metropolis-Hastings algorithm with Gaussian proposal.
# How does the simulated Markov chain depend on the variance of the proposal distribution?
remove(list=ls())
# Define vector for storage
n <- 1000
x<-rnorm(n,mean=0,sd=1) # Making a random normal distribution
x[1] <- runif(n=1,min=0,max=1) 
sigma <- 1
# Iterate random update (making a loop)
for(i in 1:(n-1))
{
# Propose candidate value
  Y <- rnorm(n=1,mean=x[i],sd=sigma) 
# Compute Metropolis ratio
  R <- (dcauchy(Y) * dnorm(x[i],mean=Y,sd=sigma)) / (dcauchy(x[i]) * dnorm(Y,mean=x[i],sd=sigma))
# Compute acceptance probability
  p <- min(1,R)
# We toss an unfair coin to decide if candidate is accepted
  accept <- rbinom(n=1,size=1,prob=p)
  x[i+1] <- ifelse(accept==1, Y, x[i])
    }
# Using mfrow to make a set or Query Graphical Parameters
par(mfrow=c(1,2))
# Makes an histogram and curve to show the results
hist(x,prob=TRUE,breaks=seq(min(x)-0.1,max(x)+0.1,0.1))
curve(dcauchy(x),add=TRUE, col=6)
h <- seq(-1,1,0.001)
lines(h,dcauchy(h),col=10,lty=2,lwd=2)
acf(x)

plot of chunk unnamed-chunk-1

############
# Step 4
# Simulate a sample from a Beta B(1/2 , 1/2) with the rejection algorithm 
# (using e.g. the uniform distribution as majorating density). 
# Then simulate the same B(1/2 , 1/2) with the MH algorithm.
remove(list=ls())
# Using the rejection algorithm
n=1000
rejection.beta <- function(n,a,b)
  {
  y <- numeric()
  # making a loop
  for (i in 1:n) {
    repeat{
      x <- runif(1) 
      if (runif(1)<(x*(a+b-2)/(a-1))^(a-1)*((1-x)*(a+b-2)/(b-1))^(b-1))
      {y[i] <- x; break}
    }
  }
  y
}

r.beta1 <- rejection.beta(10000,1/2,1/2)
r.beta2 <- rbeta(10000,1/2,1/2)
mean(r.beta1)

## [1] 0.5031

mean(r.beta2)

## [1] 0.4985

sd(r.beta1)

## [1] 0.289

sd(r.beta2)

## [1] 0.3568

# making a histogram med a false frequence (all points) rejektet beta 1
hist(r.beta1,freq=FALSE)
# making a line with the beta information
lines(0:1000/1000,dbeta(0:1000/1000,1/2,1/2))
# making a histogram med a false frequence rejektet beta 2 
hist(r.beta2,freq=FALSE, col=13)
lines(0:1000/1000,dbeta(0:1000/1000,1/2,1/2), col=2)

plot of chunk unnamed-chunk-1

acf(r.beta2)

#########
# Step 5
# In the three examples above, estimate the auto-correlation functions
# of the simulated process (use the R function acf).
# DONE - see answer above.
# acf computes  (and by default plots) estimates of the autocovariance or autocorrelation function.

###########
# Day 5
#Excercise 1.1
rm(list=ls())
# Consider the function h(x ) = [cos (50x ) + sin(20x )]^2
#Plot this function on [0,1]
x=seq(from=0,to=1,by=0.01) # Defining the interval [0,1] with 100 points i.e. 0.01 interval.
h=function(x){(cos(50*x)+sin(20*x))^2} # Defining the function.
plot(h,type='l',main='h(x)',las=1,col=2) # Plot

plot of chunk unnamed-chunk-1

########
# Exercise 1.2
# Now we want to write an algorithm to compute the integral of h(x) with stocastic simulations.
# We consider sampling from a uniform distribution.
y=runif(100, min=0, max=1) # The stochastic uniform distribution function.
h1=(cos(50*y)+sin(20*y))^2
mean(h1)# The function

## [1] 0.9671

plot(h1,type='l',main='h1(y)',las=1,col=2);abline(h=mean(h1),col=4,lty=2)

#########
# Exercise 1.3
# Compare the results obtained in 2 mean(h1), with the R function integrate()
integrate(h,lower=0,upper=1) # Computing the integral of the function h.

## 0.9652 with absolute error < 1.9e-10

# We can see that this is very close to the mean of the simulated sample h1.

plot of chunk unnamed-chunk-1