# Set your working directory
### REPLACE WITH YOUR FILE PATH
setwd('/home/nicole/Documents/Teaching/Class_Documents/Lecture2')
# WINDOWS: setwd("c:/Documents/my/working/directory")
# MAC: setwd("/Users/yourname/working/directory")

Read in Excel File excel_CAC40.xlsx, sheet

First, make sure you have installed and loaded the required libraries

library("readxl")
library("data.table")
library("tidyverse")
library("zoo")
library("reshape2")

# recall the install function: example -
# install.packages("readxl")

We want the file “excel_CAC40.xlsx”, and the sheet “Webstat_Export_20200124”. We know there is a header in the first 4 lines, so we skip these lines on the import.

df <- read_excel("excel_CAC40.xlsx", sheet = "Webstat_Export_20200124", skip = 4)
## New names:
## * `` -> ...1
## * `` -> ...6
# df <- read_excel("excel_CAC40.xlsx", sheet = 1, skip = 4) # we can also import by sheet order

# read in as a tibble object
df <- as.data.frame(df) # convert to data frame object

dim(df) # get dimensions of data
## [1] 96 16
head(df) # view first 5 rows of data

We see that “…1” and “…6” are the same item, so we remove one. We don’t like some these variable names, so we change them using better naming conventions.

df <- df[,!(names(df) %in% "...6")]
head(df)
names(df) # view names of columns
##  [1] "...1"                          "flow"                         
##  [3] "ocv_norev"                     "stock"                        
##  [5] "ocv_rev"                       "g.r. CAC 40"                  
##  [7] "g.r. CAC 40 (macrotrends)"     "CAC 40"                       
##  [9] "CAC 40 2014 =100"              "CAC40 2015 =100 (macrotrends)"
## [11] "verif 1"                       "verif 2"                      
## [13] "check"                         "inflation rate"               
## [15] "fin profitability (real)"
# we don't like some these variable names, so we change them using better naming conventions
setnames(df,old=c("...1",
                 "g.r. CAC 40",
                 "g.r. CAC 40 (macrotrends)",
                 "CAC 40",
                 "CAC 40 2014 =100",
                 "CAC40 2015 =100 (macrotrends)",
                 "verif 1",
                 "verif 2",
                 "inflation rate",
                 "fin profitability (real)"),
         new=c("qtr",
              "gr.CAC40",
              "gr.CAC40.mt",
              "CAC40",
              "CAC40.2014.100",
              "CAC40.2015.100.mt",
              "verif.1",
              "verif.2",
              "inflation.rate",
              "fin.profitability.real"))

names(df) # much better!
##  [1] "qtr"                    "flow"                  
##  [3] "ocv_norev"              "stock"                 
##  [5] "ocv_rev"                "gr.CAC40"              
##  [7] "gr.CAC40.mt"            "CAC40"                 
##  [9] "CAC40.2014.100"         "CAC40.2015.100.mt"     
## [11] "verif.1"                "verif.2"               
## [13] "check"                  "inflation.rate"        
## [15] "fin.profitability.real"

The variable qtr is coded as a character. Let’s convert it to a year-quarter data-time type.

str(df) # view the data types
## 'data.frame':    96 obs. of  15 variables:
##  $ qtr                   : chr  "1995Q4" "1996Q1" "1996Q2" "1996Q3" ...
##  $ flow                  : num  0 1523 -2581 2136 -1740 ...
##  $ ocv_norev             : num  0 -1414 2183 -383 -2399 ...
##  $ stock                 : num  225961 263342 269122 262088 278622 ...
##  $ ocv_rev               : num  NA 37272 6178 -8787 20673 ...
##  $ gr.CAC40              : num  NA 16.49 2.35 -3.27 7.89 ...
##  $ gr.CAC40.mt           : num  NA 9.231 3.859 0.428 8.576 ...
##  $ CAC40                 : num  1 1.16 1.19 1.15 1.24 ...
##  $ CAC40.2014.100        : num  0.327 0.381 0.39 0.378 0.407 ...
##  $ CAC40.2015.100.mt     : num  0.432 0.472 0.49 0.492 0.534 ...
##  $ verif.1               : num  NA 15.27 2.32 -3.32 7.59 ...
##  $ verif.2               : num  NA 16.49 2.35 -3.27 7.89 ...
##  $ check                 : num  NA 0.0 2.8e-14 0.0 0.0 ...
##  $ inflation.rate        : num  1.216 1.699 1.476 1.218 0.951 ...
##  $ fin.profitability.real: num  NA 14.8 0.87 -4.48 6.94 ...
df$qtr <- as.Date(as.yearqtr(df$qtr, format = "%YQ%q"))
str(df) # now coded as a date object
## 'data.frame':    96 obs. of  15 variables:
##  $ qtr                   : Date, format: "1995-10-01" "1996-01-01" ...
##  $ flow                  : num  0 1523 -2581 2136 -1740 ...
##  $ ocv_norev             : num  0 -1414 2183 -383 -2399 ...
##  $ stock                 : num  225961 263342 269122 262088 278622 ...
##  $ ocv_rev               : num  NA 37272 6178 -8787 20673 ...
##  $ gr.CAC40              : num  NA 16.49 2.35 -3.27 7.89 ...
##  $ gr.CAC40.mt           : num  NA 9.231 3.859 0.428 8.576 ...
##  $ CAC40                 : num  1 1.16 1.19 1.15 1.24 ...
##  $ CAC40.2014.100        : num  0.327 0.381 0.39 0.378 0.407 ...
##  $ CAC40.2015.100.mt     : num  0.432 0.472 0.49 0.492 0.534 ...
##  $ verif.1               : num  NA 15.27 2.32 -3.32 7.59 ...
##  $ verif.2               : num  NA 16.49 2.35 -3.27 7.89 ...
##  $ check                 : num  NA 0.0 2.8e-14 0.0 0.0 ...
##  $ inflation.rate        : num  1.216 1.699 1.476 1.218 0.951 ...
##  $ fin.profitability.real: num  NA 14.8 0.87 -4.48 6.94 ...
summary(df) # view summary statistics of each variable
##       qtr                  flow            ocv_norev       
##  Min.   :1995-10-01   Min.   :-15754.0   Min.   :-33243.0  
##  1st Qu.:2001-09-08   1st Qu.: -3103.2   1st Qu.: -2459.0  
##  Median :2007-08-16   Median :  -260.0   Median :    -0.5  
##  Mean   :2007-08-16   Mean   :   623.4   Mean   :  2358.0  
##  3rd Qu.:2013-07-24   3rd Qu.:  4147.2   3rd Qu.:  3086.2  
##  Max.   :2019-07-01   Max.   : 23633.0   Max.   : 42504.0  
##                                                            
##      stock            ocv_rev           gr.CAC40        gr.CAC40.mt     
##  Min.   : 225961   Min.   :-159357   Min.   :-20.668   Min.   :-28.747  
##  1st Qu.: 507720   1st Qu.: -23170   1st Qu.: -3.268   1st Qu.: -2.763  
##  Median : 667843   Median :  15589   Median :  2.499   Median :  2.553  
##  Mean   : 677603   Mean   :   8025   Mean   :  1.941   Mean   :  1.734  
##  3rd Qu.: 805964   3rd Qu.:  42044   3rd Qu.:  7.557   3rd Qu.:  7.528  
##  Max.   :1274514   Max.   : 143049   Max.   : 26.070   Max.   : 29.771  
##                    NA's   :1         NA's   :1         NA's   :1        
##      CAC40       CAC40.2014.100   CAC40.2015.100.mt    verif.1       
##  Min.   :1.000   Min.   :0.3274   Min.   :0.4320    Min.   :-23.153  
##  1st Qu.:1.929   1st Qu.:0.6314   1st Qu.:0.8143    1st Qu.: -3.322  
##  Median :2.472   Median :0.8093   Median :0.9729    Median :  2.469  
##  Mean   :2.508   Mean   :0.8211   Mean   :0.9732    Mean   :  1.488  
##  3rd Qu.:3.068   3rd Qu.:1.0044   3rd Qu.:1.1665    3rd Qu.:  7.285  
##  Max.   :4.111   Max.   :1.3458   Max.   :1.4875    Max.   : 23.166  
##                                                     NA's   :1        
##     verif.2            check   inflation.rate    fin.profitability.real
##  Min.   :-20.668   Min.   :0   Min.   :-0.3356   Min.   :-22.668       
##  1st Qu.: -3.268   1st Qu.:0   1st Qu.: 0.7317   1st Qu.: -4.553       
##  Median :  2.499   Median :0   Median : 1.2169   Median :  1.423       
##  Mean   :  1.941   Mean   :0   Mean   : 1.2612   Mean   :  0.679       
##  3rd Qu.:  7.557   3rd Qu.:0   3rd Qu.: 1.9237   3rd Qu.:  5.529       
##  Max.   : 26.070   Max.   :0   Max.   : 2.7726   Max.   : 24.733       
##  NA's   :1         NA's   :1                     NA's   :1

It appears we have some NA’s, which we know are in the first row due to calculatin growth rates. For now we leave it since we will re-calcualte the variables using R here.

Calculate Variables

# 1. ocv_rev
df$ocv_rev.r <- df$stock - lag(df$stock) - df$ocv_norev - df$flow
summary(df$ocv_rev)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -159357  -23170   15589    8025   42044  143049       1
summary(df$ocv_rev.r) # same values
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -159357  -23170   15589    8025   42044  143049       1
# 2. gr.CAC40
df$gr.CAC40.r <- df$ocv_rev.r*100/lag(df$stock)

# 3. gr.CAC40.mt
df$gr.CAC40.mt.r <- (df$CAC40.2015.100.mt-lag(df$CAC40.2015.100.mt))*100/lag(df$CAC40.2015.100.mt)

# 4. CAC40
df$CAC40.r.h <- (1+df$gr.CAC40.r/100) # first we create a 'helper' column that has the growth rate
# this makes things a bit easier to code

df$CAC40.r.h[is.na(df$CAC40.r.h)] <-1 # we set the 1st row growth rate = 1

df$CAC40.r <- 1 # we create a holder column initialized to 1
for(i in 2:nrow(df)){
  # for each row in the column CAC40.r multiple the previous row by the 'helper' column
  # we index each row by i
  df[i,]$CAC40.r <- df[i-1,]$CAC40.r*df[i,]$CAC40.r.h
}


# 5. CAC40.2014.100
df$index <- mean(df[year(df$qtr) %in% "2014",]$CAC40.r)
df$CAC40.2014.100.r <- df$CAC40.r / df$index

# 6. fin.profitability.real
df$fin.profitability.real.r <- df$gr.CAC40.r-df$inflation.rate

Why can’t we just use lead() and lag() for CAC40.r? Calculations are done simultaneously, and not progressively. For the CAC40.r, each cell needs to be calculated before it can be used in the next cell calculation, thus we need to do each calculation one at a time.

head(df)
df <- df[-1,] # remove first row

Plotting

Remember that for ease of use, we melt the data

m_df <- melt(df,id.vars="qtr")
head(m_df)

1. Real v Nominal

ggplot(m_df[m_df$variable %in% c('fin.profitability.real.r','gr.CAC40.r'),],
       aes(x=qtr,y=value,fill=variable,color=variable))+
  geom_line(alpha=.7) +
  theme_bw() +
  xlab("Quarter") +
  ylab("") +
  theme(legend.title = element_blank(),
        legend.position = "bottom") +
  scale_color_manual(values=c("Navy","cadetblue"),
                      labels=c("Nominal","Real"))

2. CAC 40, growth rate

ggplot(m_df[m_df$variable %in% c('gr.CAC40.mt.r','gr.CAC40.r'),],
       aes(x=qtr,y=value,fill=variable,color=variable))+
  geom_line(alpha=.7) +
  theme_bw() +
  xlab("Quarter") +
  ylab("") +
  theme(legend.title = element_blank(),
        legend.position = "bottom") +
  scale_color_manual(values=c("Navy","cadetblue"),
                     labels=c("CAC 40 Macro Trends \n Growth Rate",
                              "CAC 40 Growth Rate"))

3. CAC 40

ggplot(m_df[m_df$variable %in% c('CAC40.2015.100.mt','CAC40.2014.100.r'),],
       aes(x=qtr,y=value,fill=variable,color=variable))+
  geom_line(alpha=.7) +
  theme_bw() +
  xlab("Quarter") +
  ylab("") +
  theme(legend.title = element_blank(),
        legend.position = "bottom") +
  scale_color_manual(values=c("Navy","cadetblue"),
                     labels=c("CAC 40 Macro Trends (2015=1)",
                              "CAC 40 (2014=1)"))