TimeSeries in R

1. TimeSeries plotting example with multiple stocks

  • download MonthlyCloses.csv.This file contains 240 rows or two decades worth of MonthlyClosing prices for some top performing stocks in the Indian stock markets taken from CMIE (Center for Monitoring Indian Economy).When reading in data, it is best to consider converting it into CSV file(s) as it is one of the most flexible data formats, which can be read by most statistical platforms.

  • demo(graphics)in RStudio gives us a glimpse into the wide variety of plots that R can create.

# Date variables can be hard, so we convert them
# into something more useful
# Install a package called lubridate via the
# Packages tab -  should take very little time
library(lubridate)

# Next, import the data from a file. 

# If you encounter an error message, 
# then you must set the Working Directory.
# You could use the Files tab to navigate 
# to the file MonthlyCloses.csv
# and select More > Set working directory

# Read the closing prices into a variable
MonthlyCloses <- read.csv("MonthlyCloses.csv", 
                     header=TRUE)

str(MonthlyCloses)
## 'data.frame':    240 obs. of  14 variables:
##  $ Month         : Factor w/ 240 levels "Apr-00","Apr-01",..: 96 76 156 16 176 136 116 36 236 216 ...
##  $ Pharma1       : num  17.6 16 16.7 13.7 13.1 ...
##  $ Pharma2       : num  91.2 87.5 85 78.8 76.2 ...
##  $ Aluminum      : num  58.1 54.4 50.7 44.4 47.8 ...
##  $ FMCG          : num  60.5 57.5 56 55.5 58.4 58.6 59.8 59 63 63.6 ...
##  $ Housing       : num  22.8 21 20 20 21.6 ...
##  $ Software1     : num  3.36 3.52 3.71 3.52 3.91 3.7 3.48 3.73 3.62 3.48 ...
##  $ Infrastructure: num  76.2 76.7 86.7 83.3 89.3 ...
##  $ Automobile    : num  52.5 48.8 42.8 45 48 ...
##  $ Refineries    : num  68.8 66.9 65.3 62.2 67.9 ...
##  $ Bank          : num  17.3 16.6 16.6 16 17.9 ...
##  $ Power         : num  13.6 12.4 11.9 11 12.1 ...
##  $ Steel         : num  138 133 126 117 133 ...
##  $ Software2     : num  2.58 2.5 2.49 2.47 2.7 2.67 3.12 2.9 3.07 2.88 ...
head(MonthlyCloses)
##    Month Pharma1 Pharma2 Aluminum FMCG Housing Software1 Infrastructure
## 1 Jan-95   17.65   91.25    58.08 60.5   22.75      3.36          76.25
## 2 Feb-95   16.00   87.50    54.38 57.5   21.00      3.52          76.67
## 3 Mar-95   16.67   85.00    50.67 56.0   20.00      3.71          86.67
## 4 Apr-95   13.73   78.75    44.41 55.5   20.00      3.52          83.33
## 5 May-95   13.07   76.25    47.83 58.4   21.60      3.91          89.33
## 6 Jun-95   11.07   65.00    43.84 58.6   23.85      3.70          87.50
##   Automobile Refineries  Bank Power  Steel Software2
## 1      52.50      68.75 17.34 13.65 138.46      2.58
## 2      48.75      66.88 16.63 12.45 133.31      2.50
## 3      42.75      65.31 16.61 11.92 125.94      2.49
## 4      45.00      62.19 16.04 11.00 117.10      2.47
## 5      48.00      67.88 17.88 12.06 132.87      2.70
## 6      48.75      66.00 17.74 11.39 132.28      2.67
# Dimensions of the dataset
numDays <- nrow(MonthlyCloses)
numStocks <- ncol(MonthlyCloses) - 1

# Because the data is in mixed format - the 
# first column is a Date and the rest are
# Prices, we make sure that R knows about
# their intended formats

# Explicitly declare the formats of the columns
# Month field is alpha
MonthlyCloses[ , 1] <- as.character(MonthlyCloses[ , 1])

# Prices are numeric
for(i in 2:numStocks + 1) {
  MonthlyCloses[ , i] <- as.numeric(MonthlyCloses[ , i])
}

# Prepare the Month field, so it can be converted
# to a Date field. You don't have to understand
# everything here - all that the command is 
# achieving is that the Date column is set to
# a format recognisable by R
MonthlyCloses$Month <- sapply(MonthlyCloses$Month,
                            function(month) 
                              paste("01-",month, sep=""))

MonthlyCloses$Month <- as.Date(MonthlyCloses$Month,
                             "%d-%b-%y")

head(MonthlyCloses)
##        Month Pharma1 Pharma2 Aluminum FMCG Housing Software1
## 1 1995-01-01   17.65   91.25    58.08 60.5   22.75      3.36
## 2 1995-02-01   16.00   87.50    54.38 57.5   21.00      3.52
## 3 1995-03-01   16.67   85.00    50.67 56.0   20.00      3.71
## 4 1995-04-01   13.73   78.75    44.41 55.5   20.00      3.52
## 5 1995-05-01   13.07   76.25    47.83 58.4   21.60      3.91
## 6 1995-06-01   11.07   65.00    43.84 58.6   23.85      3.70
##   Infrastructure Automobile Refineries  Bank Power  Steel Software2
## 1          76.25      52.50      68.75 17.34 13.65 138.46      2.58
## 2          76.67      48.75      66.88 16.63 12.45 133.31      2.50
## 3          86.67      42.75      65.31 16.61 11.92 125.94      2.49
## 4          83.33      45.00      62.19 16.04 11.00 117.10      2.47
## 5          89.33      48.00      67.88 17.88 12.06 132.87      2.70
## 6          87.50      48.75      66.00 17.74 11.39 132.28      2.67
# Let's attach a TradingMonth column to our dataset
MonthlyCloses$TradingMonth <- months(MonthlyCloses$Month)

# We're ready to explore some plotting routines 
with(MonthlyCloses,
  plot(Software1, Software2,
       main="Monthly Closes for two stocks in the Software Sector",
       xlab="Software1", ylab="Software2",
       pch=20, cex=0.7, col="darkgreen")
)

# Obtain month-wise averages by grouping closed
# across the years
monthlyAverage <- aggregate(FMCG ~ TradingMonth,
                            MonthlyCloses,
                            mean)

# Place two plots side by side
par(mfrow=c(1,2))

# This gives us a raw barplot
barplot(monthlyAverage$FMCG,
        names.arg=monthlyAverage$TradingMonth,
        main="Month-wise Average for FMCG",
        col=colors()[30:41])

# Here's a pie chart for the same
pie(monthlyAverage$FMCG,
    labels=monthlyAverage$TradingMonth,
    main="Month-wise Average for FMCG",
    col=colors()[30:41])

# A task you could undertake is to sort these
# columns by the Month name

#lets check the class of monthlyAverage$TradingMonth
class(monthlyAverage$TradingMonth)
## [1] "character"
#for sorting we need to convert them to factor so that we get ordered factor levels
monthlyAverage$TradingMonth<-as.factor(monthlyAverage$TradingMonth)

#check again the class of monthlyAverage$TradingMonth
class(monthlyAverage$TradingMonth)
## [1] "factor"
levels(monthlyAverage$TradingMonth)
##  [1] "April"     "August"    "December"  "February"  "January"  
##  [6] "July"      "June"      "March"     "May"       "November" 
## [11] "October"   "September"
monthlyAverage$TradingMonth<-relevel(monthlyAverage$TradingMonth,"January")
#This makes "January" as the first level 

#Instead of doing one month each time, we can reorder at the time of creating the factor
monthlyAverage$TradingMonth<-factor(monthlyAverage$TradingMonth,
                levels=c("January","February","March","April",
                "May","June","July","August","September",
                "October","November","December"),ordered=TRUE)

#check again the class of monthlyAverage$TradingMonth
levels(monthlyAverage$TradingMonth)
##  [1] "January"   "February"  "March"     "April"     "May"      
##  [6] "June"      "July"      "August"    "September" "October"  
## [11] "November"  "December"
#now lets replot with correctly ordered months
# Place two plots side by side
par(mfrow=c(1,2))

# This gives us a raw barplot
barplot(monthlyAverage$FMCG,
        names.arg=sort(monthlyAverage$TradingMonth),
        main="Month-wise Average for FMCG",
        col=colors()[30:41])

# Here's a pie chart for the same
pie(monthlyAverage$FMCG,
    labels=sort(monthlyAverage$TradingMonth,decreasing = TRUE),
    main="Month-wise Average for FMCG",
    col=colors()[30:41])

# # Plot the time series for each stock
for(symbol in 2:numStocks + 1) {
  
  # The ts function of R helps us to
  # construct a time series
  plot(ts(MonthlyCloses[ , symbol],
          start=c(1995, 1), end=c(2014, 12),
          frequency=12),
       main=paste("Monthly Movements for:", 
                  colnames(MonthlyCloses)[symbol]),
       xlab="Year", ylab="Stock Price in INR",
       col="navy")
  
}

Lets visualise two related numerical variables(softeare 1 & software 2) using scatterplots and line charts placed side by side:

par(mfrow=c(1,2))

with(MonthlyCloses,
  plot(Software1, Software2,
       main="Monthly Closes for two stocks \n in the Software Sector",
       xlab="Software1", ylab="Software2",
       pch=20, cex=0.7, col="darkgreen")
)

#Now get aline chart for both 
#Determine the range for Y axis

maxVal<-max(max(MonthlyCloses$Software1),max(MonthlyCloses$Software2))
minVal<-min(min(MonthlyCloses$Software1),min(MonthlyCloses$Software2))

plot(MonthlyCloses$Software1~MonthlyCloses$Month,
     type="l",
     lwd=2,
     col="navy",
     xlab="Time", ylab="Monthly Closing price",
     main="Two software stocks",
     ylim=c(0,maxVal))

par(new=TRUE)

plot(MonthlyCloses$Software2~MonthlyCloses$Month,
     type="l",
     lwd=2,
     ann=FALSE,
     col="brown",
     ylim=c(0,maxVal))


2. TimeSeries example with manipulate package usage with four stock prices

Stocks<-read.csv("FourStocks.csv",header = TRUE)
#View(Stocks) # to see the dataset

str(Stocks)
## 'data.frame':    4945 obs. of  5 variables:
##  $ Date  : Factor w/ 4945 levels "1-Apr-02","1-Apr-03",..: 1847 3605 4021 4186 4353 4844 217 382 547 714 ...
##  $ Pharma: num  98.8 101.2 101.2 98.8 95 ...
##  $ FMCG  : num  59 58.5 58 57 56.5 57 55.5 56 56.5 57 ...
##  $ Power : num  13.6 13.3 13.3 13 12.8 ...
##  $ Steel : num  151 150 149 145 141 ...
head(Stocks)
##       Date Pharma FMCG Power  Steel
## 1 2-Jan-95  98.75 59.0 13.61 150.98
## 2 3-Jan-95 101.25 58.5 13.27 150.25
## 3 4-Jan-95 101.25 58.0 13.27 148.77
## 4 5-Jan-95  98.75 57.0 13.03 145.09
## 5 6-Jan-95  95.00 56.5 12.79 141.41
## 6 9-Jan-95  97.50 57.0 12.55 138.46
colMeans(Stocks[ , 2:5]) #getting mean column wise
##    Pharma      FMCG     Power     Steel 
## 774.45014 248.38014  51.05487 287.19741
#or 

sapply(Stocks[ , 2:5],mean)
##    Pharma      FMCG     Power     Steel 
## 774.45014 248.38014  51.05487 287.19741
#Q:Which stock has the lowest mean value across the two decades?
which.min(colMeans(Stocks[ , 2:5]))
## Power 
##     3
#Ans=power

#We can calculate the standard deviations of the 4 columns representing the stocks using the apply family functions

apply(Stocks[ , 2:5], MARGIN=2, FUN=sd)
##    Pharma      FMCG     Power     Steel 
## 749.60569 150.69119  43.64287 212.03147
#or

sapply(Stocks[ , 2:5],sd)
##    Pharma      FMCG     Power     Steel 
## 749.60569 150.69119  43.64287 212.03147
#Q:Which stock has the highest volatility, as measured by the standard deviation?
which.max(sapply(Stocks[ , 2:5],sd))
## Pharma 
##      1
###################################

SKEWNESS

library(moments)

skewness(Stocks[ , 2:5]) #skewness of all 4 stocks
##    Pharma      FMCG     Power     Steel 
## 1.4052686 1.5324310 0.5690035 0.6753831
#Q:The prices of which stock have the maximum skew?
which.max(skewness(Stocks[ , 2:5]))
## FMCG 
##    2

This question tests your visual interpretation of skews.

skew

Q:Assume that the histograms correspond to Steel and FMCG. What is the diagram on the left? Ans:Histogram of Steel

Manipulating Dates in R using lubridate package

# Install the lubridate package from the 
# Packages pane if you have not already done so
library("lubridate")
# Format the Date field based on what is 
# contained in the Date column
Stocks$Date <- as.Date(Stocks$Date, "%d-%b-%y")

# Capture the month and year data in different columns. 
# The "accessor" functions such as 'month()' & 'year()'are courtesy the lubridate library!
Stocks$Month <- month(Stocks$Date)
Stocks$Year <- year(Stocks$Date)

# View the newly formatted data
#View(Stocks)

How does this change things? Source your script and have a look at the partial screen captures below:

Before Formatting

date

After Formatting

date1

  • It is common practice to export the results of a computation in R “back” to spreadsheets. Add these lines to your script, and Source it. You should see a new file called NewStocks.csv in your working directory.
write.csv(Stocks, "NewStocks.csv")
cat("Created NewStocks.csv in",
    getwd(), "\n",
    "You may open it as a spreadsheet!\n")
## Created NewStocks.csv in J:/Elearning/courseera videos/edX_Statistics for Business - II_IIMBx QM101.2x/rstudio files 
##  You may open it as a spreadsheet!

Here’s how we define our random variable. The sample space is the set of 4,945 observations in the dataset, which stand for the prices of stock over two decade. Our RV maps each outcome (stock price) in the sample space to the average stock price for the corresponding month. Therefore our RV takes only 240 values for the 20 years under observation, rather than the potentially 4,945 individual values.

In the new xlsx spreadsheet that you just created, build a Pivot table to calculate the average Pharma stock prices by month and year. Your table should look like this - in truncated form.

pivot

The same table, albeit in a different form, can be constructed using R. Here’s code that you will add to your script.

# Here's an easy way to construct a table
# of average values by month
monthYearAvePharma <- aggregate(Pharma ~ Month + Year,
                                data=Stocks,
                                FUN=mean)
#The advantage with R is that the FUN argument can be whatever you want it to be.
head(monthYearAvePharma)
##   Month Year   Pharma
## 1     1 1995 95.35714
## 2     2 1995 87.89474
## 3     3 1995 86.64526
## 4     4 1995 84.02375
## 5     5 1995 76.99429
## 6     6 1995 72.42045

APPROXIMATION

In our next step, we construct the average tables by year and month for the remaining stocks, namely FMCG, Power and Steel. This can be done via an easy cut-and-paste in your script - make sure to change the variable names accordingly!

Using the View command, you can examine the contents of these aggregate variables. Now proceed to answer these questions.

monthYearAveFMCG <- aggregate(FMCG ~ Month + Year,
                                data=Stocks,
                                FUN=mean)
monthYearAvePower <- aggregate(Power ~ Month + Year,
                                data=Stocks,
                                FUN=mean)
monthYearAveSteel <- aggregate(Steel ~ Month + Year,
                                data=Stocks,
                                FUN=mean)

head(monthYearAveFMCG)
##   Month Year     FMCG
## 1     1 1995 58.21429
## 2     2 1995 58.02632
## 3     3 1995 56.94737
## 4     4 1995 56.68750
## 5     5 1995 57.15714
## 6     6 1995 59.05455
head(monthYearAvePower)
##   Month Year    Power
## 1     1 1995 13.05524
## 2     2 1995 12.81895
## 3     3 1995 12.37947
## 4     4 1995 11.81000
## 5     5 1995 11.13048
## 6     6 1995 12.44409
head(monthYearAveSteel)
##   Month Year    Steel
## 1     1 1995 139.4443
## 2     2 1995 131.9126
## 3     3 1995 134.0053
## 4     4 1995 131.3275
## 5     5 1995 120.0924
## 6     6 1995 136.4273
#monthYearAve<-aggregate(Stocks[,2:5]~ Month + Year,                                data=Stocks,FUN=mean)

Q:What was the average price of Steel stock for December 1995?

monthYearAveSteel$Steel[monthYearAveSteel$Month==12 & monthYearAveSteel$Year==1995]
## [1] 118.1025

Ans:118.1025

Q:What was the minimum average price of Steel stock across the months?

min(monthYearAveSteel$Steel)
## [1] 44.24286

Ans:44.24286

Q:What was the actual average price of Steel stock over the 4,945 trading days?

mean(Stocks$Steel)
## [1] 287.1974

Ans:287.1974

Q:Assume that there are equal number of trading days in a month. What is the expectation of the random variable that maps a day’s closing price of Steel stock to the average price for the month?

mean(monthYearAveSteel$Steel)
## [1] 285.7848

Ans:285.7848


One important way to regard data in financial markets is as time series. R has tremendous support for time series data. Here is code to declare the actual vs. aggregate time series in your R script and plot it.

# Let's plot and compare the actual vs aggregated prices
# the Pharma stock
par(mfrow=c(1,2))
ts.Pharma <- ts(Stocks$Pharma)
plot(ts.Pharma,
     main="Actual Prices of Pharma Stock",
     col="navy", 
     ylab="Pharma (actual)")

ts.PharmaAvg <- ts(monthYearAvePharma$Pharma,
                   start=c(1995, 1), end=c(2014, 12),
                   frequency=12)
plot(ts.PharmaAvg,
     main="Aggregate Prices of Pharma Stock",
     col="darkred", 
     ylab="Pharma (aggregate)")

The averaging by the random variable has had a “smoothing” effect on the plot.

create time series for the remaining stocks:

# Let's plot and compare the actual vs aggregated prices
par(mfrow=c(1,2))
ts.FMCG <- ts(Stocks$FMCG)
plot(ts.FMCG,
     main="Actual Prices of FMCG Stock",
     col="navy", 
     ylab="FMCG (actual)")

ts.FMCGAvg <- ts(monthYearAveFMCG$FMCG,
                   start=c(1995, 1), end=c(2014, 12),
                   frequency=12)
plot(ts.FMCGAvg,
     main="Aggregate Prices of FMCG Stock",
     col="darkred", 
     ylab="FMCG (aggregate)")

# Let's plot and compare the actual vs aggregated prices
par(mfrow=c(1,2))
ts.Power<- ts(Stocks$Power)
plot(ts.Power,
     main="Actual Prices of Power Stock",
     col="navy", 
     ylab="Power(actual)")

ts.PowerAvg <- ts(monthYearAvePower$Power,
                   start=c(1995, 1), end=c(2014, 12),
                   frequency=12)
plot(ts.PowerAvg,
     main="Aggregate Prices of Power Stock",
     col="darkred", 
     ylab="Power (aggregate)")

# Let's plot and compare the actual vs aggregated prices
par(mfrow=c(1,2))
ts.Steel<- ts(Stocks$Steel)
plot(ts.Steel,
     main="Actual Prices of Steel Stock",
     col="navy", 
     ylab="Steel(actual)")

ts.SteelAvg <- ts(monthYearAveSteel$Steel,
                   start=c(1995, 1), end=c(2014, 12),
                   frequency=12)
plot(ts.SteelAvg,
     main="Aggregate Prices of Steel Stock",
     col="darkred", 
     ylab="Steel(aggregate)")

create a script based on all of our earlier instructions and save it as FourStocks.R

# This is a library that comes shipped with RStudio, 
# and facilitates interactive plotting
# 
# NOTE: Please don't attempt to install this!
library(manipulate)

# Source what you have created earlier
# Make sure all of your variables are declared
source("FourStocks.R")
## 'data.frame':    4945 obs. of  5 variables:
##  $ Date  : Factor w/ 4945 levels "1-Apr-02","1-Apr-03",..: 1847 3605 4021 4186 4353 4844 217 382 547 714 ...
##  $ Pharma: num  98.8 101.2 101.2 98.8 95 ...
##  $ FMCG  : num  59 58.5 58 57 56.5 57 55.5 56 56.5 57 ...
##  $ Power : num  13.6 13.3 13.3 13 12.8 ...
##  $ Steel : num  151 150 149 145 141 ...

# If you encounter any errors, this is due to
# some of the steps not being executed correctly!

# Reset the plotting arrangement
par(mfrow=c(1,1))

# Function to plot the time series of all sectors
# from the indicated start year to the end year
windowAll <- function(startYear, endYear) {
  
  ts.plot(window(ts.PharmaAvg,
                 start=c(startYear, 1),
                 end=c(endYear, 12)),
          window(ts.FMCGAvg,
                 start=c(startYear, 1),
                 end=c(endYear, 12)),
          window(ts.PowerAvg,
                 start=c(startYear, 1),
                 end=c(endYear, 12)),
          window(ts.SteelAvg,
                 start=c(startYear, 1),
                 end=c(endYear, 12)),
          main=paste("Stocks from", startYear,
                     "through", endYear),
          gpars=list(xlab="Year",
                     ylab="Stock Price",
                     col=c("darkred", "navy",
                           "darkgreen", "darkorange"),
                     lwd=2))
  legend("topleft",
         bty="n",
         c("Pharma", "FMCG", "Power", "Steel"),
         lty=c(1, 1, 1, 1),
         lwd=c(2, 2, 2, 2),
         col=c("darkred", "navy", 
               "darkgreen", "darkorange"))
}

windowAll(2010,2014) #if we want for a period between 2010 to 2014

windowAll(1998,2010)

# Plot a time series of adjusted close prices 
# between lowYr and upYr. Interact with it.
# What do you notice?

#Uncomment the below commands to run the plots interactively using manipulate
# lowYr <- 1995; upYr <- 2014
# manipulate(
#   windowAll(lowYr, upYr),
#   lowYr = slider(1995, 2014,
#                  step=1,
#                  initial=1995),
#   upYr = slider(lowYr, 2014,
#                 step=1,
#                 initial=2014))
devtools::session_info()
## Session info --------------------------------------------------------------
##  setting  value                       
##  version  R version 3.2.2 (2015-08-14)
##  system   x86_64, mingw32             
##  ui       RTerm                       
##  language (EN)                        
##  collate  English_India.1252          
##  tz       Asia/Calcutta               
##  date     2015-10-30
## Packages ------------------------------------------------------------------
##  package    * version date       source        
##  devtools     1.9.1   2015-09-11 CRAN (R 3.2.2)
##  digest       0.6.8   2014-12-31 CRAN (R 3.2.0)
##  evaluate     0.8     2015-09-18 CRAN (R 3.2.2)
##  formatR      1.2.1   2015-09-18 CRAN (R 3.2.2)
##  htmltools    0.2.6   2014-09-08 CRAN (R 3.2.1)
##  knitr        1.11    2015-08-14 CRAN (R 3.2.2)
##  lubridate  * 1.3.3   2013-12-31 CRAN (R 3.2.1)
##  magrittr     1.5     2014-11-22 CRAN (R 3.2.1)
##  manipulate * 1.0.1   2014-12-24 CRAN (R 3.2.0)
##  memoise      0.2.1   2014-04-22 CRAN (R 3.2.0)
##  moments    * 0.14    2015-01-05 CRAN (R 3.2.2)
##  plyr         1.8.3   2015-06-12 CRAN (R 3.2.1)
##  Rcpp         0.12.1  2015-09-10 CRAN (R 3.2.2)
##  rmarkdown    0.8.1   2015-10-10 CRAN (R 3.2.2)
##  stringi      1.0-1   2015-10-22 CRAN (R 3.2.2)
##  stringr      1.0.0   2015-04-30 CRAN (R 3.2.1)
##  yaml         2.1.13  2014-06-12 CRAN (R 3.2.1)