#Complementaria 4 2023-20

# Importar datos ----------------------------------------------------------

#Opcion 1

Datos<-read.csv(file="AAPL.csv",sep=",")
View(Datos)
getwd()
## [1] "C:/Users/mayer/OneDrive - Universidad de los Andes/2 SEMESTRE MAESTRIA/Modelos/Complementaria 4"
#Opcion 2
library(readr)
AAPL <- read_csv("AAPL.csv")
## Rows: 21 Columns: 7
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl  (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(AAPL)

class(Datos)
## [1] "data.frame"
# Dataframes --------------------------------------------------------------

#Convertir datos en dataframes
datos<-as.data.frame(AAPL)


#Acceder a datos

datos[3,4]
## [1] 149.95
datos[1,"Low"]
## [1] 148.57
#Cantidad filas y columnas
ncol(datos)
## [1] 7
nrow(datos)
## [1] 21
#Informacion relevante
summary(datos)
##       Date                 Open            High            Low       
##  Min.   :2017-07-17   Min.   :148.8   Min.   :150.1   Min.   :147.3  
##  1st Qu.:2017-07-24   1st Qu.:150.0   1st Qu.:150.9   1st Qu.:148.9  
##  Median :2017-07-31   Median :153.4   Median :153.9   Median :151.8  
##  Mean   :2017-07-30   Mean   :153.9   Mean   :155.0   Mean   :152.6  
##  3rd Qu.:2017-08-07   3rd Qu.:157.1   3rd Qu.:158.9   3rd Qu.:156.1  
##  Max.   :2017-08-14   Max.   :159.9   Max.   :161.8   Max.   :159.1  
##      Close         Adj Close         Volume        
##  Min.   :148.7   Min.   :148.1   Min.   :15781000  
##  1st Qu.:150.3   1st Qu.:149.7   1st Qu.:19845900  
##  Median :152.7   Median :152.1   Median :22028200  
##  Mean   :153.8   Mean   :153.3   Mean   :26571690  
##  3rd Qu.:157.1   3rd Qu.:156.5   3rd Qu.:27097300  
##  Max.   :161.1   Max.   :160.4   Max.   :69936800
#Acceder a columnas
datos[["Close"]]
##  [1] 149.56 150.08 151.02 150.34 150.27 152.09 152.74 153.46 150.56 149.50
## [11] 148.73 150.05 157.14 155.57 156.39 158.81 160.08 161.06 155.32 157.48
## [21] 159.85
datos[[3]]
##  [1] 150.90 150.13 151.42 151.74 150.44 152.44 153.84 153.93 153.99 150.23
## [11] 150.33 150.22 159.75 157.21 157.40 158.92 161.83 161.27 160.00 158.57
## [21] 160.21
datos[,"Close"]
##  [1] 149.56 150.08 151.02 150.34 150.27 152.09 152.74 153.46 150.56 149.50
## [11] 148.73 150.05 157.14 155.57 156.39 158.81 160.08 161.06 155.32 157.48
## [21] 159.85
datos$Low
##  [1] 148.57 148.67 149.95 150.19 148.88 149.90 151.80 153.06 147.30 149.19
## [11] 148.13 148.41 156.16 155.02 155.69 156.67 158.27 159.11 154.63 156.07
## [21] 158.75
#Acceder a filas
datos[4,]
##         Date  Open   High    Low  Close Adj Close   Volume
## 4 2017-07-20 151.5 151.74 150.19 150.34  149.7519 17243700
#Pedir mas datos
datos[c(3,4),]
##         Date   Open   High    Low  Close Adj Close   Volume
## 3 2017-07-19 150.48 151.42 149.95 151.02  150.4293 20923000
## 4 2017-07-20 151.50 151.74 150.19 150.34  149.7519 17243700
datos[c(1,2),c("Low","Close")]
##      Low  Close
## 1 148.57 149.56
## 2 148.67 150.08
#Sacar datos con condicones
datos[datos$Close>155 & datos$Open<157,"Volume"]
## [1] 20559900 26257100
#Crear nuevas columnas
datos$logAdj<-log10(datos$`Adj Close`)

#Eliminar columnas
datos<-datos[,-ncol(datos)]

#Eliminar filas
datos<-datos[-1,]



# Graficas ----------------------------------------------------------------

#Histograma del precio ajustado de la accion

hist(x=datos$`Adj Close`,
     main = "Histograma del Precio Cierre Ajustado",
     xlab="Precio de cierre ajustado",
     ylab = "Frecuencia",
     col='red')

#Grafico de dispersion
ejeX<-log10(x = datos$`Adj Close`)[-nrow(datos)]
ejeY<-log10(x = datos$`Adj Close`)[-1]

plot(x=ejeX,y = ejeY,
     main="Grafica de dispersion del precio ajustado de cierre",
     xlab="Precio ajustado en t-1",
     ylab="Precio ajustado en t",
     col="deepskyblue")


# Fechas ------------------------------------------------------------------

as.Date(datos$Date,"%Y-%m-%d")
##  [1] "2017-07-18" "2017-07-19" "2017-07-20" "2017-07-21" "2017-07-24"
##  [6] "2017-07-25" "2017-07-26" "2017-07-27" "2017-07-28" "2017-07-31"
## [11] "2017-08-01" "2017-08-02" "2017-08-03" "2017-08-04" "2017-08-07"
## [16] "2017-08-08" "2017-08-09" "2017-08-10" "2017-08-11" "2017-08-14"
datos$Date<-as.Date(datos$Date,"%Y-%m-%d")

datos$Date2<-as.POSIXct(datos$Date,"%Y-%m-%d")


# Ajuste de datos ---------------------------------------------------------

#Paquetes
library(rriskDistributions)

library(fitdistrplus)
## Loading required package: MASS
## Loading required package: survival
library(MASS)


#Variable continua
#Exponenciales, Normal, t, gamma,...

#Crear datos de tiempos entre llegadas de estudiantes
tEstudiantes<-rexp(n = 300,rate = 3)

#Intui comporatmiento
hist(tEstudiantes)

#Ajuste de los datos
ajuste<-fitdist(tEstudiantes,"exp")

#Parametros de el ajuste
ajuste$estimate
##     rate 
## 3.090185
plot(ajuste)

#Prueba de bondad de ajuste
rta<-gofstat(ajuste)
rta$chisqpvalue #Pvalue 
## [1] 0.5321226
#Se ajustan los datos

#Distribuciones discretas

#Cantidad estudiantes en una hora

nEstudiantes<-rpois(n=300,lambda = 25)

ajuste2<-fitdist(nEstudiantes,"pois")

#Parametro
summary(ajuste2)
## Fitting of the distribution ' pois ' by maximum likelihood 
## Parameters : 
##        estimate Std. Error
## lambda    25.23       0.29
## Loglikelihood:  -909.6629   AIC:  1821.326   BIC:  1825.03
#Prueba de bondad de ajuste
rta2<-gofstat(ajuste2)
rta2$chisqpvalue #Pvalue 
## [1] 0.6905788
#Datos si se ajustan