library(ggplot2)
library(gdata)
setwd("/Users/junwen/Documents/Machine Learning")
df <- read.xls("STOXX50DVP_20110110-20161103.xlsx", sheet = 1, skip = 6, header = TRUE)
colnames(df) <- c("date", "price")
summary(df$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.62 12.27 90.56 66.45 102.80 124.30
df$date <- lapply(df$date, as.character)
df$year <- "2011"
df[which(grepl("^2011", df$date)), "year"] <- "2011"
df[which(grepl("^2012", df$date)), "year"] <- "2012"
df[which(grepl("^2013", df$date)), "year"] <- "2013"
df[which(grepl("^2014", df$date)), "year"] <- "2014"
df[which(grepl("^2015", df$date)), "year"] <- "2015"
df[which(grepl("^2016", df$date)), "year"] <- "2016"
plot(df[which("2011" == df$year), "price"], ylab = "price", type = "l", col = "red")
lines(df[which("2012" == df$year), "price"], col = "yellow")
lines(df[which("2013" == df$year), "price"], col = "green")
lines(df[which("2014" == df$year), "price"], col = "blue")
lines(df[which("2015" == df$year), "price"], col = "brown")
lines(df[which("2016" == df$year), "price"], col = "darkviolet")

qplot(year, price, data = df, geom = "boxplot")

by(df$price, df$year, function(x) {mean(x)})
## df$year: 2011
## [1] 70.26278
## --------------------------------------------------------
## df$year: 2012
## [1] 68.92988
## --------------------------------------------------------
## df$year: 2013
## [1] 63.12695
## --------------------------------------------------------
## df$year: 2014
## [1] 64.375
## --------------------------------------------------------
## df$year: 2015
## [1] 66.91778
## --------------------------------------------------------
## df$year: 2016
## [1] 64.91468
by(df$price, df$year, function(x) {sd(x)})
## df$year: 2011
## [1] 47.5929
## --------------------------------------------------------
## df$year: 2012
## [1] 42.8368
## --------------------------------------------------------
## df$year: 2013
## [1] 39.63415
## --------------------------------------------------------
## df$year: 2014
## [1] 42.04822
## --------------------------------------------------------
## df$year: 2015
## [1] 43.99815
## --------------------------------------------------------
## df$year: 2016
## [1] 43.55805