Part 1 - Playing with dates, ts object, charts, loops and functions

ts class object

Creation of a Time series generated by normal distribution starting in January 2000, ending in April 2012 each quarter

tseries <- ts(rnorm(100),start = c(2000,1), end = c(2012,4),  frequency = 4)
head(tseries)
## [1]  0.55038826 -0.65991366 -0.18029590 -0.91495211  1.93175676 -0.09766777

Multiple Time Series : We can plot multiple time series in one chart by combining both the series into a matrix.

Get the data points in form of a R vector.

ts.1 <- c(799,1174.8,865.1,1334.6,635.4,918.5,685.5,998.6,784.2,985,882.8,1071)
ts.2 <- c(655,1306.9,1323.4,1172.2,562.2,824,822.4,1265.5,799.6,1105.6,1106.7,1337.8) 
# Convert them to a matrix.
matrice <- matrix(c(ts.1,ts.2),nrow = 12)
head(matrice)
##        [,1]   [,2]
## [1,]  799.0  655.0
## [2,] 1174.8 1306.9
## [3,]  865.1 1323.4
## [4,] 1334.6 1172.2
## [5,]  635.4  562.2
## [6,]  918.5  824.0

Convert it to a time series object.

rainfall.ts <- ts(matrice,start = c(2012,1),frequency = 12)

Combining time series using the period of time as the criteria : Binding several time series together into a single multivariate time series supposes the same data frequency. There exists two ways to bind time series : the union or the intersection of the time domains of the component series. Exemple :

ts1 <- ts(101:106, freq=4, start=2012.75) 
ts.intersect(ts1, lag(ts1, -1))
##         ts1 lag(ts1, -1)
## 2013 Q1 102          101
## 2013 Q2 103          102
## 2013 Q3 104          103
## 2013 Q4 105          104
## 2014 Q1 106          105
ts.union(ts1, lag(ts1, -1))
##         ts1 lag(ts1, -1)
## 2012 Q4 101           NA
## 2013 Q1 102          101
## 2013 Q2 103          102
## 2013 Q3 104          103
## 2013 Q4 105          104
## 2014 Q1 106          105
## 2014 Q2  NA          106
cbind(ts1, lag(ts1, -1))
##         ts1 lag(ts1, -1)
## 2012 Q4 101           NA
## 2013 Q1 102          101
## 2013 Q2 103          102
## 2013 Q3 104          103
## 2013 Q4 105          104
## 2014 Q1 106          105
## 2014 Q2  NA          106

Dates class object

Different time formats:

as.Date('1915-6-16')
## [1] "1915-06-16"
as.Date('1990/02/17')
## [1] "1990-02-17"
as.Date('1/15/2001',format='%m,/%d/,%Y') 
## [1] NA
as.Date('April26,2001',format='%B,%d,%Y')
## [1] NA
period <- as.Date(c("2007-06-22", "2004-02-13")) 
# number of days between 6/22/07 and 2/13/04 
days <- period [1] - period [2]

Plotting Time Series

Packages could be installed by these commands: install.packages(“TSstudio”)

library(TSstudio)

data(USgas) 
ts_info(USgas)
##  The USgas series is a ts object with 1 variable and 238 observations
##  Frequency: 12 
##  Start time: 2000 1 
##  End time: 2019 10
ts_plot(USgas)
ts_plot(USgas ,
        title = "US Monthly Natural Gas Consumption", Xtitle = "Time",
        Ytitle = "Billion Cubic Feet")
ts_plot(USgas ,
        title = "US Monthly Natural Gas Consumption", Xtitle = "Time",
        Ytitle = "Billion Cubic Feet",
        slider = TRUE)
data("Coffee_Prices") 
ts_info(Coffee_Prices)
##  The Coffee_Prices series is a mts object with 2 variables and 701 observations
##  Frequency: 12 
##  Start time: 1960 1 
##  End time: 2018 5
ts_plot(USgas ,
        title = "US␣Monthly Natural Gas Consumption", Xtitle = "Time",
        Ytitle = "Billion Cubic Feet",
        line.mode = "lines+markers")
ts_plot(Coffee_Prices , type = "multiple")
# Libraries
library(ggplot2) 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Generating data within an uniform distribution
data <- data.frame(day = as.Date("2017-06-14") - 0:364,
value = runif(365) + seq(-140, 224)^2 / 10000)

# Most basic bubble plot
p <- ggplot(data, aes(x=day, y=value)) + geom_line() +xlab("")

p+scale_x_date(date_labels = "%b") 

p+scale_x_date(date_labels = "%Y␣%b␣%d")

p+scale_x_date(date_labels = "%W") 

p+scale_x_date(date_labels = "%m-%Y")

# Libraries
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
# Dummy data
data <- data.frame(day = as.Date("2017-06-14") - 0:364,
value = runif(365) + seq(-140, 224)^2 / 10000)

# Most basic bubble plot
p <- ggplot(data, aes(x=day, y=value)) + geom_line( color="steelblue") + geom_point() +xlab("") +theme_ipsum() + theme(axis.text.x=element_text(angle=60, hjust=1)) + scale_x_date(limit=c(as.Date("2017-01-01"),as.Date("2017-02-11"))) + ylim (0 ,1.5)

Loops and conditional expressions

a <- 0
if(a!=0){ 
  print(1/a)
} else{ 
    print("No reciprocal for 0.")
  }
## [1] "No reciprocal for 0."
murder_rate <- list(1, 0.6, 0.9)
ind <- which.min(murder_rate)
if(murder_rate[ind] < 0.5){ print(murders$state[ind])
} else{ print("No state has murder rate that low")
}
## [1] "No state has murder rate that low"
# If we try it again with a rate of 0.25, we get a different answer :
if(murder_rate[ind] < 0.25){ print(murders$state[ind])
} else{ print("No state has a murder rate that low.")
}
## [1] "No state has a murder rate that low."
# iterating loops
for (i in 1: 4) {
  print(log(i))
  }
## [1] 0
## [1] 0.6931472
## [1] 1.098612
## [1] 1.386294
for (i in c(-8, 9, 11, 45)){
print(i^2)}
## [1] 64
## [1] 81
## [1] 121
## [1] 2025
fruits <- list("apple", "banana", "cherry")
for (x in fruits) {
if (x == "cherry") {
break
}
print(x) }
## [1] "apple"
## [1] "banana"
for (i in 1:3) {
for (j in 1:i) {
print(i + j) }
}
## [1] 2
## [1] 3
## [1] 4
## [1] 4
## [1] 5
## [1] 6
mt= 5
nt =5
cter =0
mym = matrix(0,mt,nt) 
for (i in 0:mt) 
  {for (j in 0:nt) 
    { if(i==j) 
      {
      break;
      }else
        {
          mym[i,j] = i*j
          cter=cter+1
        }
    }
  print(i*j)
  } 
## [1] 0
## [1] 1
## [1] 4
## [1] 9
## [1] 16
## [1] 25
print(cter)
## [1] 15

Next and Break Statement

x <- 1:5
for (i in x) {
if (i == 3){ break
}
print(i) }
## [1] 1
## [1] 2
x <- 1:5 
for (i in x)
{if (i==2)
{
  next
}
print(i)
}
## [1] 1
## [1] 3
## [1] 4
## [1] 5

Part 2 - Exercises

Exercise 1-a Studying a Gaussian White Noise

1. Generate 1000 observations from a normal distribution with 0 mean and a standard deviation of 1.7. This process is named (ut).

# Generate 1000 observations
set.seed(2) 

ut <- rnorm(1000, mean = 0, sd = 1.7) 

# Print the first 10 observations
print(head(ut, 10))
##  [1] -1.5247547  0.3142436  2.6993371 -1.9216386 -0.1364280  0.2251145
##  [7]  1.2035230 -0.4074866  3.3736057 -0.2359379

2. Using a function plot, plot the generated series.

plot(ut, type = "l", xlab = "Index", ylab = "Value", main = "Generated Series")

acf(ut, main = "Autocorrelation Function of ut")

3. Explain what is an autocorrelation function. Plot the ACF of (ut). How do you interpret the ACF results ? Are they aligned with the definition of the white noise.

An autocorrelation function (ACF) is a mathematical tool used in time series analysis to measure the linear relationship between a time series and its lagged values. It provides information about the presence and strength of correlation between the observations at different time lags.

We could see in the graph that autocorrelationis quite weak: the ACF is close to zero or not statistically significant for all lags, it indicates that the current data point is not related to its lagged values. This implies the absence of a consistent pattern or trend in the data, which aligned with the definition of white noise

Exercise 1-b Studying a random walk

We suggest to run the same analysis using a random walk process. Formally speaking, yt : yt = yt−1 + ut A random walk can be seen as a cumulative sum of Gaussian white noises increments.

1. Generate a random walk based on the simulated white noise (keep the same seed)

set.seed(2)
white_noise <- rnorm(1000, mean = 0, sd = 1.7) 
random_walk <- cumsum(white_noise)

2. Convert the simulated data into a ts object (choose the the start date and freq). Plot the generated

series. Plot its density. What do you observe ?

# Convert the random walk series into a ts object
random_walk_ts <- ts(random_walk, start = c(1850,1), frequency = 4)

# Plot the random walk series
plot(random_walk_ts, main = "Random Walk Series")

# Plot the density of the random walk series
plot(density(random_walk_ts), main = "Density Plot of Random Walk Series")

Observations: - Series Plot: The plot of the random walk series shows a cumulative upward or downward trend, indicating the persistence or accumulation of the random increments.

Density Plot: The density plot of the random walk series typically resemble a bell-shaped distribution centered around zero. It will have fatter tails compared to a normal distribution, indicating the accumulated effect of the random increments.

These observations highlight the key characteristics of a random walk process, where the series exhibits a trend and the increments are accumulated over time.

3. Plot the ACF of (wnt). How do you interpret this results? Can the models seen during the class be used for this type of data ? Why ?

acf(random_walk_ts, main = "Autocorrelation Function of Random Walk")

We could observe a slow decay in autocorrelation.The slow decay in autocorrelation reflects the cumulative effect of the random walk, where each value is the sum of all preceding values.we see positive autocorrelation at positive lags, indicating a clear persistence or trend in the data. Thus, the ACF results for a random walk process are not aligned with the definition of white noise.

4. Run the Ljung Box test for the processes given in the exercise 1-a and 1-b and for a different lags (a for loop could be usefull). Plot the values of the Q(m) and conclude.

#Serie 1 - a
Box.test(random_walk, lag = 20, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  random_walk
## X-squared = 19117, df = 20, p-value < 2.2e-16
#Serie 1 - b
Box.test(random_walk_ts, lag = 20, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  random_walk_ts
## X-squared = 19117, df = 20, p-value < 2.2e-16
w.n_TestStat = matrix(0,20)
for(i in 1:20){
      w.n_BoxTest = Box.test(random_walk, lag = i, type = "Ljung")
      w.n_TestStat[i] = w.n_BoxTest$statistic
    }
    
plot(w.n_TestStat)

Exercise 2 - Importing, plotting and analyzing data

1. Import the database contained in the cvs file using the read.csv function. This database is a collection of stock prices.

data<-read.csv("/Users/chenjieli/Desktop/ESILV/ESILV A4/time series/cac40.csv",header=TRUE, sep=";")


library(ggfortify)
library(ggplot2)

head(data)
##    Name   LVMH L.OREAL HERMES.INTL. TOTALENERGIES  SANOFI AIRBUS BNP.PARIBAS
## 1 42370 144.90  155.30     311.7500        41.265 78.1900  62.00       52.23
## 2 42373 139.20  150.90     311.1499        40.525 77.1654  60.86       51.22
## 3 42374 139.45  151.50     304.1001        40.155 78.0806  62.09       51.49
## 4 42375 136.70  151.30     301.0000        39.770 77.0261  61.27       50.50
## 5 42376 136.60  150.85     300.5000        38.805 76.1905  60.04       49.39
## 6 42377 135.35  149.60     301.4500        37.350 74.2805  58.76       48.00
##   SCHNEIDER.ELECTRIC L.AIR.LQE.SC.ANYME..POUR.L.ETUDE.ET.L.EPXTN.
## 1             52.560                                      75.8498
## 2             51.200                                      73.8008
## 3             51.430                                      73.3618
## 4             50.520                                      72.3373
## 5             49.480                                      70.6688
## 6             49.655                                      69.5931
##   ESSILORLUXOTTICA   KERING    AXA STELLANTIS PERNOD.RICARD VINCI SAFRAN
## 1           115.05 146.7892 25.230     6.6947        105.20 59.14  63.37
## 2           111.80 141.0738 24.335     6.3694        102.00 57.73  62.27
## 3           112.85 140.2374 24.345     6.5375        102.00 58.05  61.94
## 4           112.15 139.6333 24.255     6.1975        101.50 57.71  61.50
## 5           111.85 134.7078 23.690     5.9943         99.98 57.05  60.30
## 6           110.95 132.8491 23.245     5.7481         99.16 56.86  59.61
##   CREDIT.AGRICOLE DANONE DASSAULT.SYSTEMES STMICROELECTRONICS  ENGIE
## 1          10.880  62.28            14.754              6.181 16.325
## 2          10.620  60.81            14.308              6.054 15.720
## 3          10.665  61.19            14.332              6.065 15.750
## 4          10.410  60.70            14.158              5.927 15.645
## 5          10.155  59.75            14.066              5.830 15.440
## 6          10.015  59.73            13.882              5.580 15.050
##   SOCIETE.GENERALE ORANGE CAPGEMINI LEGRAND SAINT.GOBAIN THALES ARCELORMITTAL
## 1           42.570 15.485     85.60   52.20       39.850  69.10        9.0652
## 2           41.395 15.200     82.85   51.00       38.810  68.04        8.6558
## 3           41.730 15.310     83.42   50.99       38.835  68.79        9.1838
## 4           40.925 15.540     82.80   50.25       38.095  68.45        8.6000
## 5           39.935 15.495     81.32   49.72       36.455  68.69        8.1231
## 6           38.905 15.300     79.85   49.46       36.120  69.25        7.8672
##   VEOLIA.ENVIRON CARREFOUR PUBLICIS.GROUPE RENAULT  ALSTOM CMPG.DES.ETS.MICH.
## 1        21.0816    26.650           61.38   92.63 22.7338            21.9750
## 2        20.7875    26.015           59.55   89.26 22.3504            21.3975
## 3        20.7441    26.020           59.74   89.44 22.3423            21.2400
## 4        20.6429    25.570           57.14   87.41 22.1849            20.6700
## 5        20.4597    25.320           55.20   85.47 22.0154            20.4100
## 6        20.3970    25.150           55.20   83.77 22.3020            20.3600
##   TELEPERFORMANCE BOUYGUES EUROFINS.SCIEN. UNIBAIL.RODAMCO.WE.STAPLED.UNITS
## 1           77.50   36.545          32.185                           234.40
## 2           75.98   37.155          32.115                           229.70
## 3           75.62   37.300          31.885                           232.45
## 4           75.00   36.680          31.795                           231.10
## 5           73.56   36.130          30.695                           226.10
## 6           73.04   35.715          30.850                           223.10
##   VIVENDI WORLDLINE
## 1  8.2073    23.870
## 2  7.9263    23.115
## 3  7.9655    23.510
## 4  7.8767    22.700
## 5  7.8767    22.140
## 6  7.7486    22.095
plot(data[,2], type ="l",col="blue")

2. Plot the third and fifth columns of this dataframe using either the ts_plot or the ggplot2 frame- work.

ggplot(data) +
  geom_line(aes(x = seq_along(data[, 3]), y = data[, 3], color = "L'Oreal")) +
  geom_line(aes(x = seq_along(data[, 5]), y = data[, 5], color = "Total")) +
  labs(title = "Stock Prices", x = "Observation", y = "Value", color = "Column") +
  scale_color_manual(values = c("red", "blue"))

3. Plot the density of the selected stock daily returns? Using a quantile plot, check the normality of the empirical distribution of the daily returns. Display the four charts within the same output window 6

# Calculate daily returns
returns <- diff(data$HERMES.INTL.) / lag(data$HERMES.INTL.)[-1]

# Plot density of daily returns
plot(density(returns), main = "Density of Daily Returns")

# Quantile-Quantile (Q-Q) plot
qqnorm(returns, main = "Normal Q-Q Plot")
qqline(returns)

# Histogram of daily returns
hist(returns, main = "Histogram of Daily Returns")

# Boxplot of daily returns
boxplot(returns, main = "Boxplot of Daily Returns")

4. Calculate the autocovariance and the autocorrelation functions of the daily returns and calculate the Ljung-Box text for the various numbers of lags.

# Calculate autocovariance and autocorrelation functions
acf_returns <- acf(returns, lag.max = 10, plot = FALSE)
pacf_returns <- pacf(returns, lag.max = 10, plot = FALSE)

# Perform Ljung-Box test
ljung_box_test <- Box.test(returns, lag = 10, type = "Ljung-Box")

# Print the results
print(acf_returns)
## 
## Autocorrelations of series 'returns', by lag
## 
##      0      1      2      3      4      5      6      7      8      9     10 
##  1.000 -0.033  0.035 -0.006  0.019  0.012 -0.077  0.008 -0.005  0.013  0.011
print(pacf_returns)
## 
## Partial autocorrelations of series 'returns', by lag
## 
##      1      2      3      4      5      6      7      8      9     10 
## -0.033  0.034 -0.003  0.018  0.013 -0.078  0.002  0.000  0.011  0.015
print(ljung_box_test)
## 
##  Box-Ljung test
## 
## data:  returns
## X-squared = 16.764, df = 10, p-value = 0.07975
squared_returns <- returns^2

5. Plot the same ACF of the daily squared returns and calculate the Ljung-Box text for the various numbers of lags.

Plot ACF of squared returns

acf_squared_returns <- acf(squared_returns, lag.max = 10, plot = TRUE, main = "ACF of Squared Returns")

# Perform Ljung-Box test on squared returns
ljung_box_test_squared <- Box.test(squared_returns, lag = 10, type = "Ljung-Box")

# Print the Ljung-Box test results
print(ljung_box_test_squared)
## 
##  Box-Ljung test
## 
## data:  squared_returns
## X-squared = 371.38, df = 10, p-value < 2.2e-16
plot(acf_squared_returns)

Exercise 3 - The Maximum likelihood estimator and functions

1. The purpose of this exercise is now to write a function that calculate :

■ The likelihood of a sample generated by a exponential distribution ; ■ And find the parameter θ underlying to this sample.

# Exemple exercice 3 avec loi de Poisson
# Génération d'un échantillon
d.poiss <- rpois(1000, 0.8)

# Fonction qui calcule la log-vraisemblance
log_like_poisson <- function(mu, y) {
  n <- length(y)
  sum(y * log(mu) - mu - log(factorial(y)))
}

# Optimisation de la log-vraisemblance
optim_result <- optim(1, log_like_poisson, y = d.poiss, method = "BFGS")

# Affichage du résultat
cat("Maximum de la log-vraisemblance :", -optim_result$value, "\n")
## Maximum de la log-vraisemblance : 3.184583e+14
cat("Paramètre estimé :", optim_result$par, "\n")
## Paramètre estimé : 318458342036
# Vérification en utilisant la fonction optimise
optimise_result <- optimise(function(mu) -log_like_poisson(mu, d.poiss), interval = c(0, 100), maximum = TRUE)

# Affichage du résultat
cat("Maximum de la log-vraisemblance (vérification) :", -optimise_result$objective, "\n")
## Maximum de la log-vraisemblance (vérification) : -96387.55
cat("Paramètre estimé (vérification) :", optimise_result$maximum, "\n")
## Paramètre estimé (vérification) : 99.99996

2. Answer to the same questions considering this time a ample generated by a Gaussian distribution.

# Example with Gaussian distribution
# Generate a sample
d.gaussian <- rnorm(1000, mean = 2, sd = 1)

# Function that calculates the log-likelihood
log_like_gaussian <- function(params, y) {
  mean <- params[1]
  sd <- params[2]
  -sum(dnorm(y, mean = mean, sd = sd, log = TRUE))
}

# Optimisation of the log-likelihood
optim_result <- optim(c(0, 1), log_like_gaussian, y = d.gaussian, method = "BFGS")
## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced

## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced

## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced

## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced

## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced

## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced

## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced

## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced

## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced

## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced

## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced

## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced
# Display the result
cat("Maximum of the log-likelihood:", -optim_result$value, "\n")
## Maximum of the log-likelihood: -1447.227
cat("Estimated parameters:", optim_result$par, "\n")
## Estimated parameters: 2.046812 1.028693