Tengo como objetivo pronósticar el valor de las acciones de Nvidia en la bolsa.

library(pdfetch)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.2.1     ✔ stringr 1.4.1
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(yuima)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: stats4
## Loading required package: expm
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Attaching package: 'expm'
## 
## The following object is masked from 'package:Matrix':
## 
##     expm
## 
## Loading required package: cubature
## Loading required package: mvtnorm
## ########################################
## This is YUIMA Project package v.1.15.22
## Why don't you try yuimaGUI package?
## Visit: http://www.yuima-project.com
## ########################################
## 
## Attaching package: 'yuima'
## 
## The following object is masked from 'package:tibble':
## 
##     char
## 
## The following object is masked from 'package:stats':
## 
##     simulate
NVDAdata <- pdfetch_YAHOO("NVDA",from = c("2022-01-01"),to = c("2023-01-01"), interval = '1d')
Nvidia <- NVDAdata[,4]
length(Nvidia)
## [1] 251
tsNvidia <- ts(Nvidia, start = c(2022,1),frequency=365)
plot(tsNvidia)

Diferencia de la serie de datos con logaritmo

l_Nvidia<-diff(log(tsNvidia))
plot(l_Nvidia)

Calculo los parámetros con el primer método:

Delta <- 1/365
alpha <- mean(l_Nvidia)/Delta
sigma <- sqrt(var(l_Nvidia)/Delta)
mu <- alpha +0.5*sigma^2
x0<-tsNvidia[1]

Calculo los parámetros con el segundo método:

x <- tsNvidia
gBm <- setModel(drift="mu*x", diffusion="sigma*x", xinit=x0)
## Warning in yuima.warn("Solution variable (lhs) not specified. Trying to use state variables."): 
## YUIMA: Solution variable (lhs) not specified. Trying to use state variables.
mod <- setYuima(model=gBm, data=setData(tsNvidia, delta=Delta))
set.seed(123)
fit <- qmle(mod, start=list(mu=0, sigma=1),
            lower=list(mu=0.1, sigma=0.1),
            upper=list(mu=100, sigma=10))
summary(fit)
## Quasi-Maximum likelihood estimation
## 
## Call:
## qmle(yuima = mod, start = list(mu = 0, sigma = 1), lower = list(mu = 0.1, 
##     sigma = 0.1), upper = list(mu = 100, sigma = 10))
## 
## Coefficients:
##        Estimate Std. Error
## sigma 0.7611538 0.03401626
## mu    0.1000462 0.91970561
## 
## -2 log L: 1695.661

Los comparo:

coef(fit)
##     sigma        mu 
## 0.7611538 0.1000462
sigma
##            NVDA.close
## NVDA.close  0.7609669
mu
##            NVDA.close
## NVDA.close -0.7663989
gbm_vec <- function(nsim = 10000, t = 25, mu = 0, sigma = 0.1, S0 = 100, dt = 1./365) {
  # matrix of random draws - one for each day for each simulation
  epsilon <- matrix(rnorm(t*nsim), ncol = nsim, nrow = t)  
  # get GBM and convert to price paths
  gbm <- exp((mu - sigma * sigma / 2) * dt + sigma * epsilon * sqrt(dt))
  gbm <- apply(rbind(rep(S0, nsim), gbm), 2, cumprod)
  return(gbm)
}

gBm
## 
## Diffusion process
## Number of equations: 1
## Number of Wiener noises: 1
## Parametric model with 2 parameters
valores_simulados <- simulate(gBm, true.parameter = list(mu=mu, sigma=sigma))
## Warning in yuima.warn("'delta' (re)defined."): 
## YUIMA: 'delta' (re)defined.
plot(valores_simulados)

Probando con ambas formas

nsim <- 1000
t <- 260
mu <- 0.1000462
sigma <- 0.7611538
S0 <- 301.21 #X0 segpun entendí
dt = 1/365
gbm <- gbm_vec(nsim, t, mu, sigma, S0, dt)

gbm_df <- as.data.frame(gbm) %>%
  mutate(ix = 1:nrow(gbm)) %>%
  pivot_longer(-ix, names_to = 'sim', values_to = 'price')
gbm_df %>%
  ggplot(aes(x=ix, y=price, color=sim)) +
  geom_line() +
  theme(legend.position = 'none')

data.frame(price = gbm[260, ]) %>%
  ggplot(aes(x = price)) +
  geom_histogram(aes(y = ..density..), binwidth = 0.1) +
  geom_density() + 
  ggtitle('terminal price distribution')

Obtengo el resultado del modelo para los siguientes 5 días:

D <- gbm[252, ] %>%
  density()
D$x[which.max(D$y)]
## [1] 183.7464
D <- gbm[253, ] %>%
  density()
D$x[which.max(D$y)]
## [1] 192.4457
D <- gbm[254, ] %>%
  density()
D$x[which.max(D$y)]
## [1] 185.5706
D <- gbm[255, ] %>%
  density()
D$x[which.max(D$y)]
## [1] 185.0359
D <- gbm[256, ] %>%
  density()
D$x[which.max(D$y)]
## [1] 183.6814