# Excercise report by Jacob Mars s113812 and Sissel Trap S112195.

# Day 1
# Excercise 1

#############
# Step 1
# Open the R studio.
# Check
# Step 2
# Type some expamples, matrices
x = matrix(nrow=3,nc=4,data=1:12)
# Step 3
# Save the command as a file with R. extension
# Check
# Step 4
# Install the knitr by using command:
# install.packages('knitr')
# Step 5
# Execute commands, use various buttons. Check
# Step 6
# Check the content of memory and recorded data by using funktion
ls()

## [1] "x"

# Step 7
# Removing all the objects by executing command
remove(list=ls())
# Step 8
# Save the session as a html notebook.
# Looks very nice indeed!
# Link : file:///C:/Users/Mars/Dropbox/Stokastisk%20simulation%2002443/Lektion1/Excercise1.html

###############
#Excercise 2
# Step 1
# Vector x is defined as: 
x=1:10
# Step 2
# Vector y is defined as the first 5 values of x
y=x[1:5]
# Step 3
# Vector z is defined as every second value of x
# Here the function seq(from=..,to=..,by=..) is being used, with from and to which defines the start and the end on vector x.
z=seq(from=x[1],x[10],by=2)
# Step 4
# Make the matrix x
x = matrix(nr=4,nc=5,data=1:20)
# 4 rows, 5 columns with 20 values (4*5=20), nrow(x),ncol(x), dim(x)
# Then we add 10 to row 1
x[1,]=x[1,]+10
# Then we add 20 to the column 3
x[,3]=x[,3]+20
# Step 5
# Defines y as the "transposed" matrix of x
y=t(x)
# Step 6
# Defines z as the produkt of the 2 matrices. Calculate x and y (%*%) and not by *.
z=x%*%y
# Step 7
# Read the help of read.table
# This function reads the file in table format, and creates a dataframe from it
# Step 8
# Create dataframe from the internetfile, using the functions, header, na.strings and row.names
w=read.table(file="http://www2.imm.dtu.dk/courses/02443/slides2013/data/data_ex2.txt",header=TRUE,na.strings="-999")

## Warning: cannot open: HTTP status was '404 Not Found'

## Error: kan ikke åbne forbindelsen

# There is an error because the file has been removed!!!
# row.names function defines the name of the rows
# na.strings creates a vector of strings, which are to be intepreted as NA value (-999)
# Header function shows if the file cantains names of the variables af is alway false or true
# Checking if w is defines as a dataframe
is.data.frame(w)

## Error: object 'w' not found

# TRUE . Yes, its a dataframe
# Step 9
# Correcting the name in 3 column, by using the function
colnames(w)=c("Name","Math","Physics","Chemistry","Global")

## Error: object 'w' not found

# Step 10
# Compute the mean by subjects (colums)
# The value NA can't be computed, and will need to be ignored when calculating the mean
# The same goes for column 5, which contains letters and needs to be numeric.
# Therefor, we make a new matrix, w1, excluding the names and letters.
w1=w[1:4,2:4]

## Error: object 'w' not found

# Now we can compute the mean of the columns and the rows. 
# The command na.rm=TRUE, indicates, that if there is a NA-value, it should be neglected from the computation
# Compute the mean by subjects (columns)
colMeans(w1,na.rm=TRUE)

## Error: object 'w1' not found

# Compute the mean by studens (rows)
rowMeans(w1,na.rm=TRUE)

## Error: object 'w1' not found

# Step 11
# Create a vector of logicals with the lenght of 4 containing for the grades > 10
# The physicsgrades are placed in the 3. column of matrix w. 
# We therefor make a new vector (t), which containts logical expressions i.e TRUE or FALSE for values > 10.
t=w[,3]>10

## Error: object 'w' not found

# Step 12
# Create a 4x3 metrix of logials containing TRUE for grades > 10.
# If we check for values > 10 in the original matrix w, we recieve a warning that it contains non logical expressions
# This is because letters (names) cannot be compared to numbers. We can however factor or assign number to letters if needed.
# We therefor run the function on the matrix w1 (previously used), containing only column 2,3 and 4 of matrix w.
r=w1>10

## Error: object 'w1' not found

#############
# Excercise 3.1
# Create a vector containing the 100 first numbers of the Fibunacci sequence
Fibonacci <- numeric(100) # This is the first 100 numeric values of Fubinacci.
Fibonacci[1] <- Fibonacci[2] <- 1 # Setting the value of the first and the second Fubinacci number as 1.
# The subsequent elements are defined as the sum of the preceding two elements. Now we can use the for() to write a loop:
for (i in 3:100) {Fibonacci[i] <- Fibonacci[i - 2] + Fibonacci[i - 1]}
Fibonacci

##   [1] 1.000e+00 1.000e+00 2.000e+00 3.000e+00 5.000e+00 8.000e+00 1.300e+01
##   [8] 2.100e+01 3.400e+01 5.500e+01 8.900e+01 1.440e+02 2.330e+02 3.770e+02
##  [15] 6.100e+02 9.870e+02 1.597e+03 2.584e+03 4.181e+03 6.765e+03 1.095e+04
##  [22] 1.771e+04 2.866e+04 4.637e+04 7.502e+04 1.214e+05 1.964e+05 3.178e+05
##  [29] 5.142e+05 8.320e+05 1.346e+06 2.178e+06 3.525e+06 5.703e+06 9.227e+06
##  [36] 1.493e+07 2.416e+07 3.909e+07 6.325e+07 1.023e+08 1.656e+08 2.679e+08
##  [43] 4.335e+08 7.014e+08 1.135e+09 1.836e+09 2.971e+09 4.808e+09 7.779e+09
##  [50] 1.259e+10 2.037e+10 3.295e+10 5.332e+10 8.627e+10 1.396e+11 2.259e+11
##  [57] 3.654e+11 5.913e+11 9.567e+11 1.548e+12 2.505e+12 4.053e+12 6.557e+12
##  [64] 1.061e+13 1.717e+13 2.778e+13 4.495e+13 7.272e+13 1.177e+14 1.904e+14
##  [71] 3.081e+14 4.985e+14 8.065e+14 1.305e+15 2.111e+15 3.416e+15 5.528e+15
##  [78] 8.944e+15 1.447e+16 2.342e+16 3.789e+16 6.131e+16 9.919e+16 1.605e+17
##  [85] 2.597e+17 4.202e+17 6.799e+17 1.100e+18 1.780e+18 2.880e+18 4.660e+18
##  [92] 7.540e+18 1.220e+19 1.974e+19 3.194e+19 5.168e+19 8.362e+19 1.353e+20
##  [99] 2.189e+20 3.542e+20

#Excercise 3.2
remove(list=ls())
# Write a function returning the real roots of a second order polynomial:
# f=a*x^2+b*x+c where as the constants are stored as:
a=10; b=2; c=-5
# The roots of a second order polynomial, can be determined by the quadratic function:
# a*x^2+b*x+c=0
# The quadratic equation is a second-order polynomial equation, so it has two solutions (x1,x2) which can be found:
# x1=(-b-sqrt(b^2-4*a*c))/2*a
# x2=(-b+sqrt(b^2-4*a*c))/2*a
# However, we can use the function "uniroot()" to find the roots of the function.

f <- function (x)
{ return ( a*x^2+b*x +c)}

plot(f)

plot of chunk unnamed-chunk-1

# Find the root , and a lot more information .
# We can now find the roots, using the uniroot()-function, defining an interval from 0 to 100.
TheRoot <- uniroot(f, c(0 , 100));
TheRoot $ root # Returns the numerical value of the second order polynomial.

## [1] 0.6141

#############
# Excercise 4
# 4.1 Load the file as a data.frame
Stock=read.table("http://www2.imm.dtu.dk/courses/02443/projects/data/Eurostoxx50.txt")
# Now the data is stored as "Stock".
# 4.2
#Plot the variation of the prices as a function of time on the same plotting window:
plot(Stock,main='Stock prices') # This will result in a matrix scatterplot.

plot of chunk unnamed-chunk-1

# The coloumns 2,3 and 4 and different stock prices as a function of time. 
Stock1=Stock[,2]
Stock2=Stock[,3]
Stock3=Stock[,4]
# Now we can plot the 3 different stocks:
plot(Stock1,main='Stock Prices', xlab='Time', ylab='Price',las=1,cex=0.5,col=2,xlim=c(0,1500),ylim=c(0,50))
lines(Stock2,main='Stock Prices', xlab='Time', ylab='Price',las=1,cex=0.5,col=3)
lines(Stock3,main='Stock Prices', xlab='Time', ylab='Price',las=1,cex=0.5,col=4)
# Now we will add the legend
labels <- c("Stock1","Stock2","Stock3")
legend("topright",inset=0.05,title="Stock Prices",labels, lwd=2, col=c("red","green","blue"))

plot of chunk unnamed-chunk-1

##############
#Excercise in pseudo RNG simulation
remove(list=ls())
# The congruence method
a=16807 ; b=0 ; M=2147483647
n = 1000 
# Number of iterations (simulates 1000 realisations on [0,1]) - the higher number n, the more precise. 
# The number of times it go into M
x = numeric(length=n)
x[1] = 3
for(i in 2:n)
{  x[i] = (a*x[i-1] + b) %% M }
x = x/M
# Make somc checks of ramdomness
# 1) Insert x into a histogram and insert a pink line to show the mean
hist(x,prob=TRUE)
u = seq(0,1,0.01)
lines(u,dunif(u),col=6)

plot of chunk unnamed-chunk-1

# 2) Illustrade graphically the distribution by a scatter of (Xk, Xk+2)
# Scatter plots are appropriate for examining the relationship between 2 numeric variables
# We displace displace the function (x) by 3, to check for any patterns
plot(x[1:998],x[3:1000],main='Scatterplot',xlab='x',ylab='X',las=1,cex=0.5,col=2)

plot of chunk unnamed-chunk-1

# 3) Plot the auto-correlation function
# This shows the similarity betweem observations as a function of time lag between them. 
acf(x,main='Auto-correlation Plot',las=1, ylim=c(-0.1,0.1))

plot of chunk unnamed-chunk-1

# We check the plot for correlation. Concluding that the correlation is very small, about 0.02.
# 4) Statistical test of the hypothesis that the simulated values are i.i.d. realisations of a U[0,1] distribution.
# Test if RNG pass the KS test
# This test is non-parametric and distribution free, and test if 2 datasets differ significantly.
# D is defined as the maximum vertical deviation between the two curves.
# The P-value measures how they differ, and then if the data conflicts with H0. Reject the null-hypothetis if small p-value.
ks.test(x,"punif",alternative="two.sided")

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  x
## D = 0.043, p-value = 0.04975
## alternative hypothesis: two-sided

# This test results in a very small p-value, which leads to a rejection of H0.
# We therefore try and run more simulations (higher n-value), in order to have a better basis for decision
n1=30000
x = numeric(length=n1)
x[1] = 3
for(i in 2:n1)
{  x[i] = (a*x[i-1] + b) %% M }
x = x/M
ks.test(x,"punif",alternative="two.sided")

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  x
## D = 0.0043, p-value = 0.6487
## alternative hypothesis: two-sided

# We can now conclude, that this sample comes from a uniform distribution
# Repeat different steps with different values of the parametres a, b and M
# b is a constant used in the equation  x[i] = (a*x[i-1] + b) %% M.
# By entering a value b=12345, results in a p-value=0.91
# a is the the multiplier in the equation. Changing a, can result in major erros running in functions and plots.
# Modulus, as previously mentioned, needs to be large enough to avoid discretization issues, but no larger than the limit.

##################
remove(list=ls())
# Day 2 - Excercise 1
# Exercise 1.1
# Simulate a sample from a Cauchy(0,1) distribution with the inversion method, given density f and fcdf.
n=1000
f=function(x)
{1/(pi)*(1+x^2)}
# cdf 
fcdf=function(x)
{1/2+1/(pi)*arctan(x)}
# The inverse of the cdf is found by standard mathematical procedure.
Fnif=function(x)
{tan(pi*(x-1/2))}
x<-runif(n=1000,0,1)
# Simulate 1000 numbers from the distribution and store them as the function:
y= Fnif(x)
# Plot the empirical histogram of the cauchy distributet values
hist(y[abs(y)<20],breaks=100,prob=TRUE,col=5,main='Histogram',las=1)
# We can insert the distribution as points, to see that pattern centerede around the middle.
points(y,rep(0,n),col=4)
# Compare to the target density, we see some kind of overlab.
# Picking a interval between -20 and 20, because it seemed fitting (fewer points futher out).
z=seq(-20,20,0.1) # Making a sequence in the interval [-20:20] with 0.1 interval.
zy=dcauchy(z,0,1) # Density distribution from the given Cauchy parametres and interval.
points(z,zy,col=10)

plot of chunk unnamed-chunk-1

############
rm(list=ls())

# Exercise 1.2
# Uniform method
# Simulate a sample from a Beta(2,5) distribution using the rejection method with a
# uniform distribution as auxiliary distribution.
# With the rejection method you find a function which is "bigger" than the function  that you want to find.
# q is the vektors of quantiles.
N=10000
x=runif(1000) 
u=runif(1000)
# We want to define a constant c, which is set as low as possible, in order to not get too many "unnecessary" points.
c=3
# We are now going to find a random point on the x- and y-axsis.
# The rescaled auxcility density is defined as S.
s=c*u
# Now we want to check if the point is within the graph, i.e if c*g(x)<=f(x).
acc = s<=dbeta(x,2,5)
plot(x[acc],s[acc],col=1) # Showing black dots within the limits of the function.
points(x[!acc],s[!acc],col=2) # Show red dots above the limits of the function.

plot of chunk unnamed-chunk-1

hist(x[acc],breaks=100,prob=TRUE,col=5,main="Uniform Method",xlab='X',ylab='Y',las=1) # Histogram plot
lines(seq(0,0.7,0.01),dbeta(seq(0,0.7,0.01),2,5),col=6,'l',las=1) # Adding a line to the plot.

plot of chunk unnamed-chunk-1

##############
rm(list=ls())

# Exercise 1.3 
# Simulate a sample from an N(0,1) distribution with the rejection method using
# the double exponential distribution g(x) = (alfa)/2exp(-alfa)x) as auxiliary variable.
n=1000 # Number of observations
# The X-axis is again uniform and the y-axis is expotential. 
z=runif(n,0,1) # Generating n random deviates in the interval [0,1]
# Selecting the middel of the interval (0,5) and multiplice the half with -1 and the other half with 1, thereby achieving a double exponential distribution.
z[z<0.5]<- -1
z[z>0.5]<- 1
alpha = 1
# Defines x and y
x <- z*rexp(n,1)
y <- runif(n,0,(alpha)/2*exp(-alpha*abs(x))*2)
acc <- y < dnorm(x,0,1)
plot(x[acc],y[acc],col=1,xlim=c(-6,6),ylim=c(0,1))
points(x[!acc],y[!acc],col=2)

plot of chunk unnamed-chunk-1

#############
rm(list=ls())
# Exercise 1.4
# One throws a needle of length l at random on a parquet floor with planck width l
# Study by simulation this experiment and estimate the probability of the needle to intersect a line between two planks?
l=1 # Lenght of the needle.
n=10000 # Number of throws.
# We have 2 variables; A which is the angle of the needle, and x which is the initial position of the needle.
A=runif(min=0, max=(pi),n=10000) # angle of the needle from [0:pi] because of the symmetri.
x=runif(min=0,max=l,n=10000) # Interval of the plank, which the needle can land on.
y=(sin(A)*l/2) # Length of the needle on the x-axis.
# Now we have the functions and critera for a model
# We can now make a loop to test every possible outcome, and count how many times the needel tuch the line.
count=0 # Setting our initial count
for(i in 1:n){if(y[i]>=x[i]){count=count+1}}
# {} defines the region of the loop for each y >= x (needle crosses or touches the line)
count #Display the number of times the needle intersect the line between the planks.

## [1] 3197

# We can now find the probability by taking the total amount of simulations, divided by the times the needles intersected (count)
n/count

## [1] 3.128

# We can see that the needle intersect the line on every n/count try.
# This is very close to pi, assuming that if we could increase the number of simulations, the probability would come closer to the value of pi.

##########
rm(list=ls())

# Exercise II - 5
# You want to sell your car. You receive a first offer at a price of 12 (say in K Euro) Since it
# is the first offer, you decide to reject it and wait a little bit to \smell" the market.
# You reject offers and wait until you get the first offer strictly larger than 12.
# Study by simulation the waiting time (counted in number of offers untill the sell). 
# You can for instance estimate the expectation of the waiting time. Assume that the Xi
#s are independant and identically distributed Poisson(10)
#. Consider the solution conditionally on X1 = 12
# then unconditionally (i.e. in
# average over all possible X1 values under the assumption that the Xi's are i.i.d
# Poisson(10)). The use of rpois is allowed in this exercise.
# Estimate the expectation of waiting time.
# We use the rpoist() function to simulate n independant Poisson random variables
n=1000
v<-rpois(n,10) # Generates n random numbers from the Poisson distribution.
# Can locate the numbers less than or equal to 12, and label them as false.
v[1:1000]<=12

##    [1]  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
##   [12]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE
##   [23] FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##   [34] FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE
##   [45]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##   [56]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
##   [67]  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##   [78]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE
##   [89]  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE
##  [100]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##  [111]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [122]  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [133]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE
##  [144]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE
##  [155]  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [166]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [177]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [188]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [199]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [210]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [221]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [232]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [243]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [254]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
##  [265] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE
##  [276]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
##  [287]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [298]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [309]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [320]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [331]  TRUE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE
##  [342] FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [353] FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
##  [364] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##  [375]  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE
##  [386]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE
##  [397] FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
##  [408]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##  [419] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [430]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [441]  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE
##  [452]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE
##  [463]  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [474]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
##  [485]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE
##  [496]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
##  [507]  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
##  [518]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##  [529]  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [540]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
##  [551]  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE
##  [562]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [573]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE
##  [584]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [595]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [606] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
##  [617]  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [628] FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##  [639]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [650]  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [661]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
##  [672]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE
##  [683]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [694]  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
##  [705]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##  [716]  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [727]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [738] FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE
##  [749]  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [760] FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [771]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [782]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [793]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
##  [804]  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
##  [815]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [826]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE
##  [837] FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [848]  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE
##  [859] FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [870]  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE
##  [881]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##  [892]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE
##  [903]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE
##  [914] FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##  [925]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
##  [936] FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE
##  [947]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [958]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE
##  [969] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
##  [980]  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [991]  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE

# We can then call the function to locate the position of the buyer offering 12k or more:
which((v[1:1000]<=12)==FALSE)

##   [1]    3    5    9   15   18   20   23   25   27   33   34   37   40   43
##  [15]   44   49   55   57   64   68   70   79   84   88   91   93   96   98
##  [29]  107  126  127  140  141  149  152  154  157  159  170  179  183  190
##  [43]  198  203  212  242  261  264  265  271  273  281  282  300  304  332
##  [57]  335  336  337  340  341  342  345  353  354  355  357  361  364  371
##  [71]  378  379  384  385  392  395  397  399  402  403  409  415  419  440
##  [85]  443  445  447  448  457  460  464  466  482  490  491  493  494  501
##  [99]  502  510  511  516  521  525  530  534  548  549  553  554  555  559
## [113]  561  578  581  594  606  614  618  621  627  628  631  635  649  652
## [127]  654  664  668  671  677  682  696  698  699  700  706  712  717  719
## [141]  720  729  733  738  739  741  745  747  750  752  753  760  763  774
## [155]  785  801  802  806  808  809  810  818  825  828  832  835  837  841
## [169]  843  849  852  854  857  859  860  863  871  873  874  878  880  888
## [183]  898  899  912  913  914  915  917  918  921  933  936  941  943  967
## [197]  968  969  976  982  984  986  994  995  996  997 1000

# We can then use the diff-function to find number of costumers between the buyers offering 12k or more.
diff(which((v<=12)==FALSE))-1

##   [1]  1  3  5  2  1  2  1  1  5  0  2  2  2  0  4  5  1  6  3  1  8  4  3
##  [24]  2  1  2  1  8 18  0 12  0  7  2  1  2  1 10  8  3  6  7  4  8 29 18
##  [47]  2  0  5  1  7  0 17  3 27  2  0  0  2  0  0  2  7  0  0  1  3  2  6
##  [70]  6  0  4  0  6  2  1  1  2  0  5  5  3 20  2  1  1  0  8  2  3  1 15
##  [93]  7  0  1  0  6  0  7  0  4  4  3  4  3 13  0  3  0  0  3  1 16  2 12
## [116] 11  7  3  2  5  0  2  3 13  2  1  9  3  2  5  4 13  1  0  0  5  5  4
## [139]  1  0  8  3  4  0  1  3  1  2  1  0  6  2 10 10 15  0  3  1  0  0  7
## [162]  6  2  3  2  1  3  1  5  2  1  2  1  0  2  7  1  0  3  1  7  9  0 12
## [185]  0  0  0  1  0  2 11  2  4  1 23  0  0  6  5  1  1  7  0  0  0  2

# Finally, we can now calculate the average waittime between buyers offering 12k or more, by taking the mean.
mean(diff(which((v<=12)==FALSE))-1)

## [1] 3.84

##############
rm(list=ls())
#Execise II - 6
lambda=10
n=200
# Simulate a single dataset of n=200 observations from a Poisson distribution with parametre 10.
x=rpois(n,lambda)
# We can estimae the theoretical median from the sample by using the function:
y=median(x,na.rm=FALSE);y

## [1] 10

# Bootstrapping is a method, which lets us compute estimated confidens interval and standard errors.
# We are now able to resample a given set of data, x, by using the sample function.
sample(x,n,replace=FALSE)

##   [1]  8 14  9  5 10  9 11  8  7 13 11  5  8 11 12 13 11 12  3 14  8 10 11
##  [24]  7  9  5 15  9  8  8  7 11 11  9  5  5  9  8 11  7  9 14 11  8  8  7
##  [47] 11 12  8 11  5  9 10  9  8  6 14  5 11 13  8 11 13 16 12 11  7 13 14
##  [70]  5  6 14  6 10  9 13 14 15 10  7 10  8  6 15 11  9  6 10 10  4 10  9
##  [93] 10 15 10  5  7  8  9  8 10 10 13 13 10  6 14  7  7 17 11 10 10  9  8
## [116]  5 12 10  8 12 13  7 10  9  6 10 11 21  8 16 12 14 11 13  3 12 13  8
## [139]  9 11  8 12 11 14  5 12  8 11  9 11 11  7  6 12  9  7  9  8  8  8  8
## [162] 14 10 12 11 13 18  9  6 12 10  9  9 10  8  7 19 14  7  4  8 12  9  7
## [185] 15 16  9 14 14  8 15  7  9  8  5  9 13 13  9  8

# x is the a vector containing the data. n is the size of the population being resampled. Replace determines that the sample drawn, will not be replaced.
# We can now obtain some bootstrap samples, from the orgininal poisson distributed sample.
resamples <- lapply(1:200, function(i)
  sample(x, replace = T))
# We have made 200 resamples from the original random poisson distribution. Let's display the first:
resamples[1]

## [[1]]
##   [1]  6 10  9 15 15  7  3  7 11 11 10  9 11  8 11 11  8 11 11  6 10 11  9
##  [24] 13  9  7 10  9  8  4  7 14  9  9 13  9  4 15 14  8  8  8  4 21  7  9
##  [47]  6  8 11 15 10 13  5  8 17  3  8 11  7  7  8 13  5  5 17 13  7  7  9
##  [70]  8  9  9  8 11  7 15  8 10 17  9  9  8 10 13 11 15 11  8  5 12  8  9
##  [93]  6 10  8  8  6 12  9 11 10 10 14  9  9  3 10  4  8 19  8  8  8 11 13
## [116]  7  9  9 19  7 13  5  8 12  8  7  8  8 12  7  6  7  7 21  8  6 13  6
## [139]  5 10 13  5 11  6  8  9 15 10 15 13 21 10  7 10 14  7 10  7 15 10 11
## [162] 16 13  9 14 14  9  9  9  9  9  8  9  6 11  5  9  3  7  8 11  9 10 10
## [185] 11  8  7 13  5 10  9 11  8  5 11  7 14  9 13 13

# We can now calculate the median for each bootstrap sample, a total of 200 samples:
r.median <- sapply(resamples, median)
r.median

##   [1]  9.0  9.0  9.0  9.5  9.0  9.0 10.0  9.0 10.0  9.0 10.0  9.0 10.0  9.5
##  [15]  9.0 10.0  9.5 10.0  9.5 10.0 10.0  9.0 10.0 10.0  9.0  9.0 10.0  9.0
##  [29] 10.0 10.0  9.0  9.0  9.5  9.0 10.0  9.0  9.0  9.0 10.0 10.0 10.0 10.0
##  [43] 10.0  9.0 10.0  9.0 10.0  9.0  9.0  9.0 10.0 10.0  9.0 10.0 10.0  9.0
##  [57] 10.0 10.0  9.0  9.0  9.0  9.0  9.5 10.0  9.0  9.5  9.0  9.0 10.0  9.0
##  [71] 10.0 10.0 10.0  9.0  9.5  9.0  9.0  9.0 10.0 10.0  9.5 10.0  9.0 10.0
##  [85]  9.0  9.0  9.0  9.0  9.0 10.0  9.0 10.0  9.0 10.0 10.0 10.0 10.0  9.0
##  [99]  9.0  9.0  9.0  9.0  9.0  9.0 10.0 10.0 10.0 10.0 10.0  9.0 10.0  9.0
## [113] 10.0 10.0 10.0 10.0 10.0  9.5 10.0 10.0  9.0  9.0 10.0  9.5 10.0 10.0
## [127] 10.0 10.0  9.0 10.0 10.0 10.0 10.0 10.0  9.0  9.0  9.5 10.0 10.0 10.0
## [141]  9.5 10.0 10.0  9.0 10.0 10.0 10.0 10.0 10.0 10.0  9.0 10.0 10.0 10.0
## [155] 10.0  9.0 10.0  9.0  9.0 10.0  9.5 10.0 10.0 10.0  9.0 10.0 10.0  9.0
## [169]  9.0 10.0 10.0  9.0  9.0  9.0  9.0 10.0  9.0  9.5 10.0 10.0 10.0 10.0
## [183] 10.0 10.0 10.0 10.0 10.0  9.0 10.0 10.0 10.0  9.0  9.0  9.0 10.0 10.0
## [197] 10.0 10.0  9.0  9.0

mean(r.median) # Showing the mean of the medians of the bootstrap samples

## [1] 9.568

# We can now calculate the varians and standard deviation of the distributions of means
sqrt(var(r.median))

## [1] 0.4773

# Let's display the distribution of medians by a histogram to get a good overview
hist(r.median,main='Histogram of median',xlab='Median',ylab='Frequency',las=1,col=2)

plot of chunk unnamed-chunk-1

# We can conclude that the standard deviation is very small. We can also see in the histogram, 

###########
rm(list=ls())
# Exercise II - 7
#Simulate a sample of size 50000 from a bivariate centred, standardized Gaussian vector
#with correlation coefficient p=0.7. Plot this sample. Extract a sample of a random
#variable approximately distributed as Y2jY1 = :5. Estimate its mean and variance. Plot its
#empirical histogram and compare it to a normal density with same parameters as the one simulated above
# First we are going to define the target density of the distribution
n=50000
sd=1
rho=0.7
Sigma=matrix(nr=2,nc=2,data=c(sd,rho,rho,sd))
Sigma

##      [,1] [,2]
## [1,]  1.0  0.7
## [2,]  0.7  1.0

# Next step is to use Choleski factorisation, which transforms it into a lower triangular matrix (density form) and its conjugate transpose.
U = chol(Sigma) ; L = t(U)
L

##      [,1]   [,2]
## [1,]  1.0 0.0000
## [2,]  0.7 0.7141

##      [,1]   [,2]
## [1,]    1 0.7000
## [2,]    0 0.7141

L %*% U ## Checking if the produkt is equal to Sigma

##      [,1] [,2]
## [1,]  1.0  0.7
## [2,]  0.7  1.0

# We can now try to simulate 2 sample sizes from a normal random distribution:
x1 = rnorm(n) ; x2 = rnorm(n)
X = rbind(x1,x2)
Y = L %*% X
Y = t(Y)
# Let's try and plot the samples to get an overview.
plot(Y,asp=1,main='Sample plot',las=1,cex=1,col=2)
library(MASS)
points(x1,x2,asp=1,cex=1,col=3)
contour(kde2d(x1,x2),asp=TRUE,theta=20,phi=30,add=TRUE)

plot of chunk unnamed-chunk-1

# Extract a sample of random variable approximately distributed as Y2|Y2=0.5.
plot(Y,asp=1,main='Sample plot',las=1,cex=1,col=2)
abline(v=.5,col=1,lty=1)

plot of chunk unnamed-chunk-1

# The sample needs to be distributed from the line Y2|Y2=0.5.
dim(Y) # Displays the dimension of the matrix nrow=50.000, ncol=2.

## [1] 50000     2

# We wish to search for the values of Y2 in coloumn 2, equals to Y1=0.5.
# We need to construct an interval, since there are no values excactly equal 0.5:
Y1<-Y[Y[,1]>0.495 & Y[,1]<0.505,]
Y1 # So we have the number if points equal approximately distributed as Y2|Y2=0.5.

##          [,1]      [,2]
##   [1,] 0.4985  0.829266
##   [2,] 0.5042  0.019415
##   [3,] 0.5042 -0.242850
##   [4,] 0.5001  0.226590
##   [5,] 0.4971  1.795938
##   [6,] 0.4987  1.313292
##   [7,] 0.5045 -0.082372
##   [8,] 0.4976 -0.128580
##   [9,] 0.4985  0.492945
##  [10,] 0.5015 -0.140528
##  [11,] 0.4995 -0.559081
##  [12,] 0.5027  0.577112
##  [13,] 0.5005  0.734403
##  [14,] 0.5032  0.042021
##  [15,] 0.4964  1.199742
##  [16,] 0.4982 -0.002343
##  [17,] 0.5023 -0.046294
##  [18,] 0.4964  0.904195
##  [19,] 0.5038 -0.986600
##  [20,] 0.5022  0.497566
##  [21,] 0.4956  0.063014
##  [22,] 0.4956  1.074809
##  [23,] 0.5035 -1.056473
##  [24,] 0.5002  0.558216
##  [25,] 0.5011  0.140927
##  [26,] 0.5045 -0.426411
##  [27,] 0.4990 -0.031250
##  [28,] 0.5017 -0.330763
##  [29,] 0.5035  0.508255
##  [30,] 0.4971 -0.802860
##  [31,] 0.4979 -0.308101
##  [32,] 0.5048  0.908391
##  [33,] 0.5040 -0.212330
##  [34,] 0.4972  0.529411
##  [35,] 0.5001  0.473288
##  [36,] 0.5020 -0.517218
##  [37,] 0.5028  1.013873
##  [38,] 0.5012  1.033856
##  [39,] 0.4988  0.071873
##  [40,] 0.4961 -0.628664
##  [41,] 0.4990  0.516624
##  [42,] 0.4987  0.142610
##  [43,] 0.4957  1.298239
##  [44,] 0.4975  1.418427
##  [45,] 0.4983  0.432915
##  [46,] 0.5019 -0.689283
##  [47,] 0.4999  0.290450
##  [48,] 0.4985  1.832195
##  [49,] 0.5009 -1.215578
##  [50,] 0.4982  0.494001
##  [51,] 0.5033  1.678251
##  [52,] 0.4957  1.510081
##  [53,] 0.4981 -0.090491
##  [54,] 0.5011 -0.219850
##  [55,] 0.5003  0.762345
##  [56,] 0.5033 -0.178789
##  [57,] 0.4959  0.767021
##  [58,] 0.5023  0.895312
##  [59,] 0.5037  0.102576
##  [60,] 0.4956  0.575111
##  [61,] 0.5041  0.156826
##  [62,] 0.4985  0.189039
##  [63,] 0.4985  0.245255
##  [64,] 0.5011  0.621015
##  [65,] 0.5028 -0.825683
##  [66,] 0.5015 -0.246627
##  [67,] 0.4973  1.625610
##  [68,] 0.5026  0.397433
##  [69,] 0.4993 -0.665298
##  [70,] 0.4961 -0.272861
##  [71,] 0.4987 -0.111202
##  [72,] 0.5017  0.158186
##  [73,] 0.5031  0.326494
##  [74,] 0.4965 -0.480791
##  [75,] 0.5023 -0.357251
##  [76,] 0.5013 -0.176681
##  [77,] 0.5000  1.030528
##  [78,] 0.4956  0.727908
##  [79,] 0.5005  0.544041
##  [80,] 0.5044  0.942811
##  [81,] 0.4997 -0.318955
##  [82,] 0.4978  0.583843
##  [83,] 0.4992  2.246581
##  [84,] 0.4972 -0.192198
##  [85,] 0.5047 -0.168675
##  [86,] 0.5016 -1.022175
##  [87,] 0.5012  0.712302
##  [88,] 0.4966  0.031974
##  [89,] 0.5010  0.046267
##  [90,] 0.5021 -0.200522
##  [91,] 0.5006  1.209431
##  [92,] 0.5007 -0.739788
##  [93,] 0.5024  0.706869
##  [94,] 0.5050 -0.232137
##  [95,] 0.5015  0.644070
##  [96,] 0.4991  0.152565
##  [97,] 0.5023  0.250831
##  [98,] 0.5021  1.076171
##  [99,] 0.4962 -0.194631
## [100,] 0.5048 -0.167607
## [101,] 0.4997  0.432455
## [102,] 0.5030  1.759575
## [103,] 0.4955 -0.851974
## [104,] 0.4966  0.010827
## [105,] 0.4955  0.785594
## [106,] 0.5034  0.022750
## [107,] 0.5028 -0.023737
## [108,] 0.4964  0.893537
## [109,] 0.5044  0.415870
## [110,] 0.4979  1.309633
## [111,] 0.5014  0.655810
## [112,] 0.4990  0.572451
## [113,] 0.4998  0.268540
## [114,] 0.5050  0.889427
## [115,] 0.4954 -0.516190
## [116,] 0.5048  1.410140
## [117,] 0.5025  0.406740
## [118,] 0.4952  0.632928
## [119,] 0.4992  0.792577
## [120,] 0.4974 -0.197847
## [121,] 0.5025  1.001766
## [122,] 0.5014  0.411691
## [123,] 0.5039 -0.236270
## [124,] 0.5020 -0.022188
## [125,] 0.5026  0.792639
## [126,] 0.5009  1.618309
## [127,] 0.4955  0.662524
## [128,] 0.5044 -0.136648
## [129,] 0.4958  0.578041
## [130,] 0.5033 -0.054754
## [131,] 0.4986  0.842644
## [132,] 0.4972 -0.436240
## [133,] 0.5035  0.980766
## [134,] 0.5036 -0.220547
## [135,] 0.4966  0.015066
## [136,] 0.5041  1.150835
## [137,] 0.4974  0.332787
## [138,] 0.5026 -0.748405
## [139,] 0.5017 -0.600951
## [140,] 0.5021  0.055045
## [141,] 0.4985  0.987667
## [142,] 0.4984  0.728285
## [143,] 0.5005  1.135297
## [144,] 0.4999  0.147379
## [145,] 0.4954  1.580595
## [146,] 0.4996  0.109728
## [147,] 0.4978  0.113975
## [148,] 0.5024  0.264079
## [149,] 0.5034  0.310003
## [150,] 0.5050  0.577579
## [151,] 0.4966  1.656514

# We can now extract a sample of a random variable from this distribution:
sample(Y1,1,replace=FALSE)

## [1] -0.2428

mean(Y1) # Calculating the mean of the Y2|Y2=0.5 sample distribution.

## [1] 0.4168

var(Y1) # The varians would be smaller if we sampled from a smaller interval.

##            [,1]       [,2]
## [1,]  8.182e-06 -0.0003064
## [2,] -3.064e-04  0.4608198

hist(Y,breaks=100,prob=TRUE,col=5,main='Histogram',las=1)

plot of chunk unnamed-chunk-1

hist(Y1,breaks=100,prob=TRUE,col=5,main='Histogram',las=1)

plot of chunk unnamed-chunk-1

##########
rm(list=ls())
# Day 4
# Exercises step 1-6
# Step 1
# Simulate a Markov chain
# We use a method to find out if the value is mostly 1 or 0, like if we had a coin.
# because S is given to either 1 or 0.
x1<-rep(NA,1)
x2<-rep(NA,2)
N=1000
# The proberbility that we get for 1 after a 0 is p1=1/sqrt(2)
p1<-1/sqrt(2)
# The proberbility that we get fr 1 after a 1 is 1/pi
p2<-1/pi
# we don't need to look at how the posibility is for 0 (the other side) because it's the same.
# We create a loop to collect the data to from the binomial distribution.
for(i in 1:N) 
{
  x1[i]<-rbinom(1,1,p1)
  x2[i]<-rbinom(1,1,p2)
}
# Insert a histogram that can show the probability, det ses at der er flest 0'er og at det samlede antal er de 1000 = N
# It shows that there are a 70 % and 30 % destribution.  
hist(x2,prob=T,col=3, main="Markov chain",xlab='s={1,0}',ylab='Proberbility')

plot of chunk unnamed-chunk-1

############
# Step 2
# Simulate a Markov chain with the Metropolis algorithm with the 
# Cauchy distribution C(0;1) as a target distribution. 
# Use e.g. a normal pdf with 0 mean and a unit variance as a proposal q.
rm(list=ls())
n=1000
x<-rnorm(n,mean=0,sd=1)
f=function(x)
  {1/((pi)*(1+x^2))}
q=function(x)
  {1/(sqrt(s*n))*exp(-x^2/2)}

# We make a loop
for (i in 1:n-1)
  {
y<-rnorm(n=1, mean=0,sd=1)
# Compute the ratio
R <- (f(y) * dnorm(x[i],m=0,sd=1)) / (f(x[i]) * dnorm(y,m=0,sd=1))   
# Compute
p <- min(1,R)
# Defines acceptense with the p and if it is not accepted then gets X[i+1] with ifelse-function.
accept<-rbinom(n=1,size=1,prob=p)
x[i+1] <-ifelse(accept == 1,y,x[i])}
# Make histogram and curve to show the result
hist(x,prob=T,col=3, main="Markov chain with the Metropolis algorithm",xlab='C={0,1}',ylab='Proberbility')
curve(dcauchy(x),add=TRUE, col=2)

plot of chunk unnamed-chunk-1

acf(x)

plot of chunk unnamed-chunk-1

# The acf-graph looks fine because it's with no deviation.

##########
# Step 3
# Same as previous exercise with a Metropolis-Hastings algorithm with Gaussian proposal.
# How does the simulated Markov chain depend on the variance of the proposal distribution?
remove(list=ls())
# Define vector for storage
n <- 1000
x<-rnorm(n,mean=0,sd=1) # Making a random normal distribution
x[1] <- runif(n=1,min=0,max=1) 
sigma <- 1
# Iterate random update (making a loop)
for(i in 1:(n-1))
{
# Propose candidate value
  Y <- rnorm(n=1,mean=x[i],sd=sigma) 
# Compute Metropolis ratio
  R <- (dcauchy(Y) * dnorm(x[i],mean=Y,sd=sigma)) / (dcauchy(x[i]) * dnorm(Y,mean=x[i],sd=sigma))
# Compute acceptance probability
  p <- min(1,R)
# We toss an unfair coin to decide if candidate is accepted
  accept <- rbinom(n=1,size=1,prob=p)
  x[i+1] <- ifelse(accept==1, Y, x[i])
    }
# Using mfrow to make a set or Query Graphical Parameters
par(mfrow=c(1,2))
# Makes an histogram and curve to show the results
hist(x,prob=TRUE,breaks=seq(min(x)-0.1,max(x)+0.1,0.1))
curve(dcauchy(x),add=TRUE, col=6)
h <- seq(-1,1,0.001)
lines(h,dcauchy(h),col=10,lty=2,lwd=2)
acf(x)

plot of chunk unnamed-chunk-1

############
# Step 4
# Simulate a sample from a Beta B(1/2 , 1/2) with the rejection algorithm 
# (using e.g. the uniform distribution as majorating density). 
# Then simulate the same B(1/2 , 1/2) with the MH algorithm.
remove(list=ls())
# Using the rejection algorithm
n=1000
rejection.beta <- function(n,a,b)
  {
  y <- numeric()
  # making a loop
  for (i in 1:n) {
    repeat{
      x <- runif(1) 
      if (runif(1)<(x*(a+b-2)/(a-1))^(a-1)*((1-x)*(a+b-2)/(b-1))^(b-1))
      {y[i] <- x; break}
    }
  }
  y
}

r.beta1 <- rejection.beta(10000,1/2,1/2)
r.beta2 <- rbeta(10000,1/2,1/2)
mean(r.beta1)

## [1] 0.4965

mean(r.beta2)

## [1] 0.5022

sd(r.beta1)

## [1] 0.2894

sd(r.beta2)

## [1] 0.3528

# making a histogram med a false frequence (all points) rejektet beta 1
hist(r.beta1,freq=FALSE)
# making a line with the beta information
lines(0:1000/1000,dbeta(0:1000/1000,1/2,1/2))
# making a histogram med a false frequence rejektet beta 2 
hist(r.beta2,freq=FALSE, col=13)
lines(0:1000/1000,dbeta(0:1000/1000,1/2,1/2), col=2)

plot of chunk unnamed-chunk-1

acf(r.beta2)

#########
# Step 5
# In the three examples above, estimate the auto-correlation functions
# of the simulated process (use the R function acf).
# DONE - see answer above.
# acf computes  (and by default plots) estimates of the autocovariance or autocorrelation function.

###########
# Day 5
#Excercise 1.1
rm(list=ls())
# Consider the function h(x ) = [cos (50x ) + sin(20x )]^2
#Plot this function on [0,1]
x=seq(from=0,to=1,by=0.01) # Defining the interval [0,1] with 100 points i.e. 0.01 interval.
h=function(x){(cos(50*x)+sin(20*x))^2} # Defining the function.
plot(h,type='l',main='h(x)',las=1,col=2) # Plot

plot of chunk unnamed-chunk-1

########
# Exercise 1.2
# Now we want to write an algorithm to compute the integral of h(x) with stocastic simulations.
# We consider sampling from a uniform distribution.
y=runif(100, min=0, max=1) # The stochastic uniform distribution function.
h1=(cos(50*y)+sin(20*y))^2
mean(h1)# The function

## [1] 0.8847

plot(h1,type='l',main='h1(y)',las=1,col=2);abline(h=mean(h1),col=4,lty=2)

#########
# Exercise 1.3
# Compare the results obtained in 2 mean(h1), with the R function integrate()
integrate(h,lower=0,upper=1) # Computing the integral of the function h.

## 0.9652 with absolute error < 1.9e-10

# We can see that this is very close to the mean of the simulated sample h1.

#########
# Day 5 - Opt. Excercise I
h<-function(x){3*dnorm(x,m=-2,sd=0.2)+6*dnorm(x,m=1,sd=0.5)+1*dnorm(x,m=3,sd=0.3)} # The function
n=100 # Our sample size.
U <- runif(n=n,min=-5,max=5) # Using the uniform simulation method, with n values in the interval [-5,5].

TheMaX <- which.max(h(U)) # This determines the index of the uniform sample, which leads to the maximum of the function.
XMaX <- U[TheMaX] # Shows the maximum value of x.
hMaX <- h(XMaX) # Shows the maximum value of the function.

# Comparing the result to the R function optimize.
optimize(h,lower=-10,upper=10,maximum=T) # Using the function to find maximum value in a given interval.

## $maximum
## [1] -2
## 
## $objective
## [1] 5.984

# By comparing the results with the one obtained in the first step, we see the results are very similar.

##########
rm(list=ls())
# Excecise I step 2.
# Let's implement the simulated annealing method to maximize the function h on [-3,3]^2.
h = function(x,y)
{
  3* dnorm(x,m=0,sd=.5)*dnorm(y,m=0,sd=.5) +
    dnorm(x,m=-1,sd=.5)*dnorm(y,m=1,sd=.3) +
    dnorm(x,m=1,sd=.5)*dnorm(y,m=1,sd=.3)}
# Now we can define the interval.
x = y = seq(-3,3,0.01)
H = outer(x,y,h) # Storing the values in a matrix.
# Let's plot the function to check the behavior of the sequence.
image(x,y,H,las=1)
contour(x,y,H,add=TRUE,) # Adding contours to the plot.

plot of chunk unnamed-chunk-1