goog.R

steven — Dec 3, 2013, 10:57 PM

# Load and require desired packages
require(quantmod)
Loading required package: quantmod
Loading required package: Defaults
Loading required package: xts
Loading required package: zoo

Attaching package: 'zoo'

The following object is masked from 'package:base':

    as.Date, as.Date.numeric

Loading required package: TTR
Version 0.4-0 included new data defaults. See ?getSymbols.
require(ggplot2)
Loading required package: ggplot2
require(knitr)

# Get data for Google and Microsoft
getSymbols("GOOG")
    As of 0.4-0, 'getSymbols' uses env=parent.frame() and
 auto.assign=TRUE by default.

 This  behavior  will be  phased out in 0.5-0  when the call  will
 default to use auto.assign=FALSE. getOption("getSymbols.env") and 
 getOptions("getSymbols.auto.assign") are now checked for alternate defaults

 This message is shown once per session and may be disabled by setting 
 options("getSymbols.warning4.0"=FALSE). See ?getSymbol for more details
[1] "GOOG"
getSymbols("MSFT")
[1] "MSFT"

# Place the OHLCV data for Google and Microsoft into objects
goog <- GOOG
msft <- MSFT

# Get minor view of what is in each object
head(goog)
           GOOG.Open GOOG.High GOOG.Low GOOG.Close GOOG.Volume
2007-01-03     466.0     476.7    461.1      467.6     7706500
2007-01-04     469.0     483.9    468.4      483.3     7887600
2007-01-05     482.5     487.5    478.1      487.2     6872100
2007-01-08     487.7     489.9    482.2      483.6     4754400
2007-01-09     485.4     488.2    481.2      485.5     5381400
2007-01-10     484.4     493.6    482.0      489.5     5968500
           GOOG.Adjusted
2007-01-03         467.6
2007-01-04         483.3
2007-01-05         487.2
2007-01-08         483.6
2007-01-09         485.5
2007-01-10         489.5
head(msft)
           MSFT.Open MSFT.High MSFT.Low MSFT.Close MSFT.Volume
2007-01-03     29.91     30.25    29.40      29.86    76935100
2007-01-04     29.70     29.97    29.44      29.81    45774500
2007-01-05     29.63     29.75    29.45      29.64    44607200
2007-01-08     29.65     30.10    29.53      29.93    50220200
2007-01-09     30.00     30.18    29.73      29.96    44636600
2007-01-10     29.80     29.89    29.43      29.66    55017400
           MSFT.Adjusted
2007-01-03         25.46
2007-01-04         25.42
2007-01-05         25.27
2007-01-08         25.52
2007-01-09         25.54
2007-01-10         25.29

# Get the summary data for each object
summary(goog)
     Index              GOOG.Open      GOOG.High       GOOG.Low   
 Min.   :2007-01-03   Min.   : 262   Min.   : 269   Min.   : 247  
 1st Qu.:2008-09-24   1st Qu.: 482   1st Qu.: 487   1st Qu.: 477  
 Median :2010-06-18   Median : 560   Median : 567   Median : 551  
 Mean   :2010-06-18   Mean   : 577   Mean   : 582   Mean   : 571  
 3rd Qu.:2012-03-10   3rd Qu.: 630   3rd Qu.: 635   3rd Qu.: 623  
 Max.   :2013-12-03   Max.   :1064   Max.   :1068   Max.   :1060  
   GOOG.Close    GOOG.Volume       GOOG.Adjusted 
 Min.   : 257   Min.   :  840900   Min.   : 257  
 1st Qu.: 482   1st Qu.: 2159550   1st Qu.: 482  
 Median : 559   Median : 3028600   Median : 559  
 Mean   : 576   Mean   : 3770606   Mean   : 576  
 3rd Qu.: 629   3rd Qu.: 4625250   3rd Qu.: 629  
 Max.   :1063   Max.   :23287300   Max.   :1063  
summary(msft)
     Index              MSFT.Open      MSFT.High       MSFT.Low   
 Min.   :2007-01-03   Min.   :15.2   Min.   :15.6   Min.   :14.9  
 1st Qu.:2008-09-24   1st Qu.:25.6   1st Qu.:25.9   1st Qu.:25.3  
 Median :2010-06-18   Median :28.1   Median :28.3   Median :27.8  
 Mean   :2010-06-18   Mean   :27.8   Mean   :28.1   Mean   :27.5  
 3rd Qu.:2012-03-10   3rd Qu.:30.2   3rd Qu.:30.5   3rd Qu.:30.0  
 Max.   :2013-12-03   Max.   :38.1   Max.   :38.8   Max.   :38.1  
   MSFT.Close    MSFT.Volume       MSFT.Adjusted 
 Min.   :15.2   Min.   :1.11e+07   Min.   :13.4  
 1st Qu.:25.6   1st Qu.:4.41e+07   1st Qu.:23.3  
 Median :28.1   Median :5.45e+07   Median :25.3  
 Mean   :27.8   Mean   :6.16e+07   Mean   :25.5  
 3rd Qu.:30.2   3rd Qu.:7.16e+07   3rd Qu.:27.9  
 Max.   :38.5   Max.   :3.19e+08   Max.   :38.5  

# Get the IQR of each security
IQR(goog)
[1] 308.8
IQR(msft)
[1] 6.988

# Get the mean, standard deviation and variance of Google and Microsoft
meangadj <- mean(goog$GOOG.Adjusted)
sdgadj <- sd(goog$GOOG.Adjusted)
vargadj <- var(goog$GOOG.Adjusted)
meangadj
[1] 576.5
sdgadj
[1] 149.9
vargadj
              GOOG.Adjusted
GOOG.Adjusted         22473

meanmadj <- mean(msft$MSFT.Adjusted)
sdmadj <- sd(msft$MSFT.Adjusted)
varmadj <- var(msft$MSFT.Adjusted)
meanmadj
[1] 25.48
sdmadj
[1] 4.244
varmadj
              MSFT.Adjusted
MSFT.Adjusted         18.01

# Plot the adjusted close of Google and Microsoft
plot(goog$GOOG.Adjusted)

plot of chunk unnamed-chunk-1

plot(msft$MSFT.Adjusted)

plot of chunk unnamed-chunk-1

chartSeries(goog)

plot of chunk unnamed-chunk-1

chartSeries(msft)

plot of chunk unnamed-chunk-1


# Get the daily returns for Google and Microsoft
# get their corresponding means, standard deviations and variance
dlyrtgoog <- dailyReturn(goog)
mean(dlyrtgoog)
[1] 0.000673
sd(dlyrtgoog)
[1] 0.02035
var(dlyrtgoog)
              daily.returns
daily.returns     0.0004141
dlyrtmsft <- dailyReturn(msft)
mean(dlyrtmsft)
[1] 0.0003229
sd(dlyrtmsft)
[1] 0.01907
var(dlyrtmsft)
              daily.returns
daily.returns     0.0003637

# Plot the daily returns of Google and Microsoft
plot(dlyrtgoog)

plot of chunk unnamed-chunk-1

plot(dlyrtmsft)

plot of chunk unnamed-chunk-1


# Get the log of the daily returns
logrtgoog <- log(dlyrtgoog)
Warning: NaNs produced
logrtmsft <- log(dlyrtmsft)
Warning: NaNs produced

# Plot the log of the daily returns
plot(logrtgoog)

plot of chunk unnamed-chunk-1

plot(logrtmsft)

plot of chunk unnamed-chunk-1


# Get a histogram of the daily returns of Google and Microsoft
hist(dlyrtgoog)

plot of chunk unnamed-chunk-1

hist(dlyrtmsft)

plot of chunk unnamed-chunk-1


# Get a histogram of the log of daily returns of Google and Microsoft
hist(logrtgoog)

plot of chunk unnamed-chunk-1

hist(logrtmsft)

plot of chunk unnamed-chunk-1


# Perform a simple t.test of the mean = 0 for Google and Microsoft Adjusted Close
t.test(goog$GOOG.Adjusted)

    One Sample t-test

data:  goog$GOOG.Adjusted
t = 160.6, df = 1742, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
 569.5 583.6
sample estimates:
mean of x 
    576.5 
t.test(msft$MSFT.Adjusted)

    One Sample t-test

data:  msft$MSFT.Adjusted
t = 250.6, df = 1742, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
 25.28 25.68
sample estimates:
mean of x 
    25.48 

# Performa a simple t.test of the Daily Return objects for MSFT and GOOG
t.test(dlyrtgoog, dlyrtmsft, paired=T, alternative="two.sided")

    Paired t-test

data:  dlyrtgoog and dlyrtmsft
t = 0.7601, df = 1742, p-value = 0.4473
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.0005532  0.0012533
sample estimates:
mean of the differences 
                0.00035 
t.test(dlyrtgoog, dlyrtmsft, paired=T, alternative="greater")

    Paired t-test

data:  dlyrtgoog and dlyrtmsft
t = 0.7601, df = 1742, p-value = 0.2236
alternative hypothesis: true difference in means is greater than 0
95 percent confidence interval:
 -0.0004078        Inf
sample estimates:
mean of the differences 
                0.00035 
t.test(dlyrtgoog, dlyrtmsft, paired=T, alternative="less")

    Paired t-test

data:  dlyrtgoog and dlyrtmsft
t = 0.7601, df = 1742, p-value = 0.7764
alternative hypothesis: true difference in means is less than 0
95 percent confidence interval:
     -Inf 0.001108
sample estimates:
mean of the differences 
                0.00035 

# Plotting the histograms of daily returns of Google and Microsoft on the same graph
p1 <- hist(dlyrtgoog)

plot of chunk unnamed-chunk-1

p2 <- hist(dlyrtmsft)

plot of chunk unnamed-chunk-1

plot( p1, col=rgb(0,0,1,1/4), xlim=c(-.2,.3))
plot( p2, col=rgb(1,0,0,1/4), xlim=c(-.2,.3), add=T)

plot of chunk unnamed-chunk-1


## calculate the density - don't plot yet
densgoog <- density(dlyrtgoog)
densmsft <- density(dlyrtmsft)
## calculate the range of the graph
xlim <- range(densgoog$x,densmsft$x)
ylim <- range(0, densgoog$y, densmsft$y)
#pick the colours
googCol <- rgb(1,0,0,0.2)
msftCol <- rgb(0,0,1,0.2)
## plot the carrots and set up most of the plot parameters
plot(densgoog, xlim = xlim, ylim = ylim, xlab = 'Daily Returns',
     main = 'Distribution of GOOG and MSFT Daily Returns', 
     panel.first = grid())
#put our density plots in
polygon(densgoog, density = -1, col = googCol)
polygon(densmsft, density = -1, col = msftCol)
## add a legend in the corner
legend('topleft',c('GOOG','MSFT'),
       fill = c(googCol, msftCol), bty = 'n',
       border = NA)

plot of chunk unnamed-chunk-1


p1 <- hist(logrtgoog)

plot of chunk unnamed-chunk-1

p2 <- hist(logrtmsft)

plot of chunk unnamed-chunk-1

plot( p1, col=rgb(0,0,1,1/4), xlim=c(-12,3), xlab = "Log Returns"
      , main = "Histogram of Log Returns for Google and Microsoft")
plot( p2, col=rgb(1,0,0,1/4), xlim=c(-12,3), add=T)

plot of chunk unnamed-chunk-1


# Read in .csv files
goog.csv <- read.csv("GOOG.csv")
msft.csv <- read.csv("MSFT.csv")
googadj <- goog.csv$Adj.Close
msftadj <- msft.csv$Adj.Close
gx <- 1:227
mx <- 1:227

# Get a correlation matrix of Google v Microsoft
cor(googadj, msftadj)
[1] 0.8756

googlm <- lm(googadj ~ gx)
msftlm <- lm(msftadj ~ mx)

plot(googadj, xlab = "Jan 2013 to Nov 2013", ylab = "Price", 
     main = "Google Adjusted Close Year 2013 with Trend Line", type="l")
abline(googlm)

plot of chunk unnamed-chunk-1

plot(msftadj, xlab = "Jan 2013 to Nov 2013", ylab = "Price",
     main = "Microsoft Adjusted Close Year 2013 with Trend Line", type="l")
abline(msftlm)

plot of chunk unnamed-chunk-1


densgadj <- density(googadj)
## calculate the range of the graph
xlim <- range(densgadj$x)
ylim <- range(0, densgadj$y)
#pick the colours
googCol <- rgb(1,0,0,0.2)
## plot the carrots and set up most of the plot parameters
plot(densgadj, xlim = xlim, ylim = ylim, xlab = 'Daily Returns',
     main = 'Distribution of GOOG', 
     panel.first = grid())
#put our density plots in
polygon(densgadj, density = -1, col = googCol)
## add a legend in the corner
legend('topleft',c('GOOG'),
       fill = c(googCol), bty = 'n',
       border = NA)

plot of chunk unnamed-chunk-1


# ROC curve and other analysis for the LACE Tool
# Load in the ROCR library
library(ROCR)
Loading required package: gplots
KernSmooth 2.23 loaded
Copyright M. P. Wand 1997-2009

Attaching package: 'gplots'

The following object is masked from 'package:stats':

    lowess
goog <- read.csv("GOOG.csv")
# Creat predictions values and performance values
pred <- prediction(goog$signal, goog$FAILURE)
perf <- performance(pred, "tpr", "fpr")
plot(perf, colorize=T, lwd=3, 
     main = 'ROC Curve for Simple Trade Signal', type='b')

plot of chunk unnamed-chunk-1


AUC <- performance(pred, 'auc')
AUC
An object of class "performance"
Slot "x.name":
[1] "None"

Slot "y.name":
[1] "Area under the ROC curve"

Slot "alpha.name":
[1] "none"

Slot "x.values":
list()

Slot "y.values":
[[1]]
[1] 0.9722


Slot "alpha.values":
list()