Creation of a Time series generated by normal distribution starting in January 2000, ending in April 2012 each quarter
tseries <- ts(rnorm(100),start = c(2000,1), end = c(2012,4), frequency = 4)
head(tseries)
## [1] 0.55038826 -0.65991366 -0.18029590 -0.91495211 1.93175676 -0.09766777
Get the data points in form of a R vector.
ts.1 <- c(799,1174.8,865.1,1334.6,635.4,918.5,685.5,998.6,784.2,985,882.8,1071)
ts.2 <- c(655,1306.9,1323.4,1172.2,562.2,824,822.4,1265.5,799.6,1105.6,1106.7,1337.8)
# Convert them to a matrix.
matrice <- matrix(c(ts.1,ts.2),nrow = 12)
head(matrice)
## [,1] [,2]
## [1,] 799.0 655.0
## [2,] 1174.8 1306.9
## [3,] 865.1 1323.4
## [4,] 1334.6 1172.2
## [5,] 635.4 562.2
## [6,] 918.5 824.0
Convert it to a time series object.
rainfall.ts <- ts(matrice,start = c(2012,1),frequency = 12)
Combining time series using the period of time as the criteria : Binding several time series together into a single multivariate time series supposes the same data frequency. There exists two ways to bind time series : the union or the intersection of the time domains of the component series. Exemple :
ts1 <- ts(101:106, freq=4, start=2012.75)
ts.intersect(ts1, lag(ts1, -1))
## ts1 lag(ts1, -1)
## 2013 Q1 102 101
## 2013 Q2 103 102
## 2013 Q3 104 103
## 2013 Q4 105 104
## 2014 Q1 106 105
ts.union(ts1, lag(ts1, -1))
## ts1 lag(ts1, -1)
## 2012 Q4 101 NA
## 2013 Q1 102 101
## 2013 Q2 103 102
## 2013 Q3 104 103
## 2013 Q4 105 104
## 2014 Q1 106 105
## 2014 Q2 NA 106
cbind(ts1, lag(ts1, -1))
## ts1 lag(ts1, -1)
## 2012 Q4 101 NA
## 2013 Q1 102 101
## 2013 Q2 103 102
## 2013 Q3 104 103
## 2013 Q4 105 104
## 2014 Q1 106 105
## 2014 Q2 NA 106
Different time formats:
as.Date('1915-6-16')
## [1] "1915-06-16"
as.Date('1990/02/17')
## [1] "1990-02-17"
as.Date('1/15/2001',format='%m,/%d/,%Y')
## [1] NA
as.Date('April26,2001',format='%B,%d,%Y')
## [1] NA
period <- as.Date(c("2007-06-22", "2004-02-13"))
# number of days between 6/22/07 and 2/13/04
days <- period [1] - period [2]
Packages could be installed by these commands: install.packages(“TSstudio”)
library(TSstudio)
data(USgas)
ts_info(USgas)
## The USgas series is a ts object with 1 variable and 238 observations
## Frequency: 12
## Start time: 2000 1
## End time: 2019 10
ts_plot(USgas)
ts_plot(USgas ,
title = "US Monthly Natural Gas Consumption", Xtitle = "Time",
Ytitle = "Billion Cubic Feet")
ts_plot(USgas ,
title = "US Monthly Natural Gas Consumption", Xtitle = "Time",
Ytitle = "Billion Cubic Feet",
slider = TRUE)
data("Coffee_Prices")
ts_info(Coffee_Prices)
## The Coffee_Prices series is a mts object with 2 variables and 701 observations
## Frequency: 12
## Start time: 1960 1
## End time: 2018 5
ts_plot(USgas ,
title = "US␣Monthly Natural Gas Consumption", Xtitle = "Time",
Ytitle = "Billion Cubic Feet",
line.mode = "lines+markers")
ts_plot(Coffee_Prices , type = "multiple")
# Libraries
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Generating data within an uniform distribution
data <- data.frame(day = as.Date("2017-06-14") - 0:364,
value = runif(365) + seq(-140, 224)^2 / 10000)
# Most basic bubble plot
p <- ggplot(data, aes(x=day, y=value)) + geom_line() +xlab("")
p+scale_x_date(date_labels = "%b")
p+scale_x_date(date_labels = "%Y␣%b␣%d")
p+scale_x_date(date_labels = "%W")
p+scale_x_date(date_labels = "%m-%Y")
# Libraries
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
# Dummy data
data <- data.frame(day = as.Date("2017-06-14") - 0:364,
value = runif(365) + seq(-140, 224)^2 / 10000)
# Most basic bubble plot
p <- ggplot(data, aes(x=day, y=value)) + geom_line( color="steelblue") + geom_point() +xlab("") +theme_ipsum() + theme(axis.text.x=element_text(angle=60, hjust=1)) + scale_x_date(limit=c(as.Date("2017-01-01"),as.Date("2017-02-11"))) + ylim (0 ,1.5)
a <- 0
if(a!=0){
print(1/a)
} else{
print("No reciprocal for 0.")
}
## [1] "No reciprocal for 0."
murder_rate <- list(1, 0.6, 0.9)
ind <- which.min(murder_rate)
if(murder_rate[ind] < 0.5){ print(murders$state[ind])
} else{ print("No state has murder rate that low")
}
## [1] "No state has murder rate that low"
# If we try it again with a rate of 0.25, we get a different answer :
if(murder_rate[ind] < 0.25){ print(murders$state[ind])
} else{ print("No state has a murder rate that low.")
}
## [1] "No state has a murder rate that low."
# iterating loops
for (i in 1: 4) {
print(log(i))
}
## [1] 0
## [1] 0.6931472
## [1] 1.098612
## [1] 1.386294
for (i in c(-8, 9, 11, 45)){
print(i^2)}
## [1] 64
## [1] 81
## [1] 121
## [1] 2025
fruits <- list("apple", "banana", "cherry")
for (x in fruits) {
if (x == "cherry") {
break
}
print(x) }
## [1] "apple"
## [1] "banana"
for (i in 1:3) {
for (j in 1:i) {
print(i + j) }
}
## [1] 2
## [1] 3
## [1] 4
## [1] 4
## [1] 5
## [1] 6
mt= 5
nt =5
cter =0
mym = matrix(0,mt,nt)
for (i in 0:mt)
{for (j in 0:nt)
{ if(i==j)
{
break;
}else
{
mym[i,j] = i*j
cter=cter+1
}
}
print(i*j)
}
## [1] 0
## [1] 1
## [1] 4
## [1] 9
## [1] 16
## [1] 25
print(cter)
## [1] 15
x <- 1:5
for (i in x) {
if (i == 3){ break
}
print(i) }
## [1] 1
## [1] 2
x <- 1:5
for (i in x)
{if (i==2)
{
next
}
print(i)
}
## [1] 1
## [1] 3
## [1] 4
## [1] 5
# Generate 1000 observations
set.seed(2)
ut <- rnorm(1000, mean = 0, sd = 1.7)
# Print the first 10 observations
print(head(ut, 10))
## [1] -1.5247547 0.3142436 2.6993371 -1.9216386 -0.1364280 0.2251145
## [7] 1.2035230 -0.4074866 3.3736057 -0.2359379
plot(ut, type = "l", xlab = "Index", ylab = "Value", main = "Generated Series")
acf(ut, main = "Autocorrelation Function of ut")
An autocorrelation function (ACF) is a mathematical tool used in time series analysis to measure the linear relationship between a time series and its lagged values. It provides information about the presence and strength of correlation between the observations at different time lags.
We could see in the graph that autocorrelationis quite weak: the ACF is close to zero or not statistically significant for all lags, it indicates that the current data point is not related to its lagged values. This implies the absence of a consistent pattern or trend in the data, which aligned with the definition of white noise
We suggest to run the same analysis using a random walk process. Formally speaking, yt : yt = yt−1 + ut A random walk can be seen as a cumulative sum of Gaussian white noises increments.
set.seed(2)
white_noise <- rnorm(1000, mean = 0, sd = 1.7)
random_walk <- cumsum(white_noise)
series. Plot its density. What do you observe ?
# Convert the random walk series into a ts object
random_walk_ts <- ts(random_walk, start = c(1850,1), frequency = 4)
# Plot the random walk series
plot(random_walk_ts, main = "Random Walk Series")
# Plot the density of the random walk series
plot(density(random_walk_ts), main = "Density Plot of Random Walk Series")
Observations: - Series Plot: The plot of the random walk series shows a
cumulative upward or downward trend, indicating the persistence or
accumulation of the random increments.
Density Plot: The density plot of the random walk series typically resemble a bell-shaped distribution centered around zero. It will have fatter tails compared to a normal distribution, indicating the accumulated effect of the random increments.
These observations highlight the key characteristics of a random walk process, where the series exhibits a trend and the increments are accumulated over time.
acf(random_walk_ts, main = "Autocorrelation Function of Random Walk")
We could observe a slow decay in autocorrelation.The slow decay in autocorrelation reflects the cumulative effect of the random walk, where each value is the sum of all preceding values.we see positive autocorrelation at positive lags, indicating a clear persistence or trend in the data. Thus, the ACF results for a random walk process are not aligned with the definition of white noise.
#Serie 1 - a
Box.test(random_walk, lag = 20, type = "Ljung")
##
## Box-Ljung test
##
## data: random_walk
## X-squared = 19117, df = 20, p-value < 2.2e-16
#Serie 1 - b
Box.test(random_walk_ts, lag = 20, type = "Ljung")
##
## Box-Ljung test
##
## data: random_walk_ts
## X-squared = 19117, df = 20, p-value < 2.2e-16
w.n_TestStat = matrix(0,20)
for(i in 1:20){
w.n_BoxTest = Box.test(random_walk, lag = i, type = "Ljung")
w.n_TestStat[i] = w.n_BoxTest$statistic
}
plot(w.n_TestStat)
data<-read.csv("/Users/chenjieli/Desktop/ESILV/ESILV A4/time series/cac40.csv",header=TRUE, sep=";")
library(ggfortify)
library(ggplot2)
head(data)
## Name LVMH L.OREAL HERMES.INTL. TOTALENERGIES SANOFI AIRBUS BNP.PARIBAS
## 1 42370 144.90 155.30 311.7500 41.265 78.1900 62.00 52.23
## 2 42373 139.20 150.90 311.1499 40.525 77.1654 60.86 51.22
## 3 42374 139.45 151.50 304.1001 40.155 78.0806 62.09 51.49
## 4 42375 136.70 151.30 301.0000 39.770 77.0261 61.27 50.50
## 5 42376 136.60 150.85 300.5000 38.805 76.1905 60.04 49.39
## 6 42377 135.35 149.60 301.4500 37.350 74.2805 58.76 48.00
## SCHNEIDER.ELECTRIC L.AIR.LQE.SC.ANYME..POUR.L.ETUDE.ET.L.EPXTN.
## 1 52.560 75.8498
## 2 51.200 73.8008
## 3 51.430 73.3618
## 4 50.520 72.3373
## 5 49.480 70.6688
## 6 49.655 69.5931
## ESSILORLUXOTTICA KERING AXA STELLANTIS PERNOD.RICARD VINCI SAFRAN
## 1 115.05 146.7892 25.230 6.6947 105.20 59.14 63.37
## 2 111.80 141.0738 24.335 6.3694 102.00 57.73 62.27
## 3 112.85 140.2374 24.345 6.5375 102.00 58.05 61.94
## 4 112.15 139.6333 24.255 6.1975 101.50 57.71 61.50
## 5 111.85 134.7078 23.690 5.9943 99.98 57.05 60.30
## 6 110.95 132.8491 23.245 5.7481 99.16 56.86 59.61
## CREDIT.AGRICOLE DANONE DASSAULT.SYSTEMES STMICROELECTRONICS ENGIE
## 1 10.880 62.28 14.754 6.181 16.325
## 2 10.620 60.81 14.308 6.054 15.720
## 3 10.665 61.19 14.332 6.065 15.750
## 4 10.410 60.70 14.158 5.927 15.645
## 5 10.155 59.75 14.066 5.830 15.440
## 6 10.015 59.73 13.882 5.580 15.050
## SOCIETE.GENERALE ORANGE CAPGEMINI LEGRAND SAINT.GOBAIN THALES ARCELORMITTAL
## 1 42.570 15.485 85.60 52.20 39.850 69.10 9.0652
## 2 41.395 15.200 82.85 51.00 38.810 68.04 8.6558
## 3 41.730 15.310 83.42 50.99 38.835 68.79 9.1838
## 4 40.925 15.540 82.80 50.25 38.095 68.45 8.6000
## 5 39.935 15.495 81.32 49.72 36.455 68.69 8.1231
## 6 38.905 15.300 79.85 49.46 36.120 69.25 7.8672
## VEOLIA.ENVIRON CARREFOUR PUBLICIS.GROUPE RENAULT ALSTOM CMPG.DES.ETS.MICH.
## 1 21.0816 26.650 61.38 92.63 22.7338 21.9750
## 2 20.7875 26.015 59.55 89.26 22.3504 21.3975
## 3 20.7441 26.020 59.74 89.44 22.3423 21.2400
## 4 20.6429 25.570 57.14 87.41 22.1849 20.6700
## 5 20.4597 25.320 55.20 85.47 22.0154 20.4100
## 6 20.3970 25.150 55.20 83.77 22.3020 20.3600
## TELEPERFORMANCE BOUYGUES EUROFINS.SCIEN. UNIBAIL.RODAMCO.WE.STAPLED.UNITS
## 1 77.50 36.545 32.185 234.40
## 2 75.98 37.155 32.115 229.70
## 3 75.62 37.300 31.885 232.45
## 4 75.00 36.680 31.795 231.10
## 5 73.56 36.130 30.695 226.10
## 6 73.04 35.715 30.850 223.10
## VIVENDI WORLDLINE
## 1 8.2073 23.870
## 2 7.9263 23.115
## 3 7.9655 23.510
## 4 7.8767 22.700
## 5 7.8767 22.140
## 6 7.7486 22.095
plot(data[,2], type ="l",col="blue")
ggplot(data) +
geom_line(aes(x = seq_along(data[, 3]), y = data[, 3], color = "L'Oreal")) +
geom_line(aes(x = seq_along(data[, 5]), y = data[, 5], color = "Total")) +
labs(title = "Stock Prices", x = "Observation", y = "Value", color = "Column") +
scale_color_manual(values = c("red", "blue"))
# Calculate daily returns
returns <- diff(data$HERMES.INTL.) / lag(data$HERMES.INTL.)[-1]
# Plot density of daily returns
plot(density(returns), main = "Density of Daily Returns")
# Quantile-Quantile (Q-Q) plot
qqnorm(returns, main = "Normal Q-Q Plot")
qqline(returns)
# Histogram of daily returns
hist(returns, main = "Histogram of Daily Returns")
# Boxplot of daily returns
boxplot(returns, main = "Boxplot of Daily Returns")
# Calculate autocovariance and autocorrelation functions
acf_returns <- acf(returns, lag.max = 10, plot = FALSE)
pacf_returns <- pacf(returns, lag.max = 10, plot = FALSE)
# Perform Ljung-Box test
ljung_box_test <- Box.test(returns, lag = 10, type = "Ljung-Box")
# Print the results
print(acf_returns)
##
## Autocorrelations of series 'returns', by lag
##
## 0 1 2 3 4 5 6 7 8 9 10
## 1.000 -0.033 0.035 -0.006 0.019 0.012 -0.077 0.008 -0.005 0.013 0.011
print(pacf_returns)
##
## Partial autocorrelations of series 'returns', by lag
##
## 1 2 3 4 5 6 7 8 9 10
## -0.033 0.034 -0.003 0.018 0.013 -0.078 0.002 0.000 0.011 0.015
print(ljung_box_test)
##
## Box-Ljung test
##
## data: returns
## X-squared = 16.764, df = 10, p-value = 0.07975
squared_returns <- returns^2
acf_squared_returns <- acf(squared_returns, lag.max = 10, plot = TRUE, main = "ACF of Squared Returns")
# Perform Ljung-Box test on squared returns
ljung_box_test_squared <- Box.test(squared_returns, lag = 10, type = "Ljung-Box")
# Print the Ljung-Box test results
print(ljung_box_test_squared)
##
## Box-Ljung test
##
## data: squared_returns
## X-squared = 371.38, df = 10, p-value < 2.2e-16
plot(acf_squared_returns)
■ The likelihood of a sample generated by a exponential distribution ; ■ And find the parameter θ underlying to this sample.
# Exemple exercice 3 avec loi de Poisson
# Génération d'un échantillon
d.poiss <- rpois(1000, 0.8)
# Fonction qui calcule la log-vraisemblance
log_like_poisson <- function(mu, y) {
n <- length(y)
sum(y * log(mu) - mu - log(factorial(y)))
}
# Optimisation de la log-vraisemblance
optim_result <- optim(1, log_like_poisson, y = d.poiss, method = "BFGS")
# Affichage du résultat
cat("Maximum de la log-vraisemblance :", -optim_result$value, "\n")
## Maximum de la log-vraisemblance : 3.184583e+14
cat("Paramètre estimé :", optim_result$par, "\n")
## Paramètre estimé : 318458342036
# Vérification en utilisant la fonction optimise
optimise_result <- optimise(function(mu) -log_like_poisson(mu, d.poiss), interval = c(0, 100), maximum = TRUE)
# Affichage du résultat
cat("Maximum de la log-vraisemblance (vérification) :", -optimise_result$objective, "\n")
## Maximum de la log-vraisemblance (vérification) : -96387.55
cat("Paramètre estimé (vérification) :", optimise_result$maximum, "\n")
## Paramètre estimé (vérification) : 99.99996
# Example with Gaussian distribution
# Generate a sample
d.gaussian <- rnorm(1000, mean = 2, sd = 1)
# Function that calculates the log-likelihood
log_like_gaussian <- function(params, y) {
mean <- params[1]
sd <- params[2]
-sum(dnorm(y, mean = mean, sd = sd, log = TRUE))
}
# Optimisation of the log-likelihood
optim_result <- optim(c(0, 1), log_like_gaussian, y = d.gaussian, method = "BFGS")
## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced
## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced
## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced
## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced
## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced
## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced
## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced
## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced
## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced
## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced
## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced
## Warning in dnorm(y, mean = mean, sd = sd, log = TRUE): NaNs produced
# Display the result
cat("Maximum of the log-likelihood:", -optim_result$value, "\n")
## Maximum of the log-likelihood: -1447.227
cat("Estimated parameters:", optim_result$par, "\n")
## Estimated parameters: 2.046812 1.028693