A stock market is where buyers and sellers trade shares of a company, and is one of the most popular ways for individuals and companies to invest money. The size of the world stock market is now estimated to be in the trillions. The largest stock market in the world is the New York Stock Exchange (NYSE), located in New York City. About 2,800 companies are listed on the NSYE. In this problem, we'll look at the monthly stock prices of five of these companies: IBM, General Electric (GE), Procter and Gamble, Coca Cola, and Boeing. The data used in this problem comes from Infochimps.
Date: the date of the stock price, always given as the first of the month.
StockPrice: the average stock price of the company in the given month.
Before working with these data sets, we need to convert the dates into a format that R can understand. Take a look at the structure of one of the datasets using the str function. Right now, the date variable is stored as a factor. Convert this to a “Date” object.
# Set the directory at where the data is located
setwd("/home/tarek/Analytics/Week1/Rlectures/Data")
# Read the Data
IBM <- read.csv("IBMStock.csv")
GE <- read.csv("GEStock.csv")
ProcterGamble <- read.csv("ProcterGambleStock.csv")
CocaCola <- read.csv("CocaColaStock.csv")
Boeing <- read.csv("BoeingStock.csv")
# Convert date factor into a date object
IBM$Date = as.Date(IBM$Date, "%m/%d/%y")
GE$Date = as.Date(GE$Date, "%m/%d/%y")
CocaCola$Date = as.Date(CocaCola$Date, "%m/%d/%y")
ProcterGamble$Date = as.Date(ProcterGamble$Date, "%m/%d/%y")
Boeing$Date = as.Date(Boeing$Date, "%m/%d/%y")
# Structure of IBM Stock data
str(IBM)
## 'data.frame': 480 obs. of 2 variables:
## $ Date : Date, format: "1970-01-01" "1970-02-01" ...
## $ StockPrice: num 360 347 327 320 270 ...
max(IBM$Date)
## [1] "2009-12-01"
summary(IBM)
## Date StockPrice
## Min. :1970-01-01 Min. : 43.4
## 1st Qu.:1979-12-24 1st Qu.: 88.3
## Median :1989-12-16 Median :112.1
## Mean :1989-12-15 Mean :144.4
## 3rd Qu.:1999-12-08 3rd Qu.:165.4
## Max. :2009-12-01 Max. :438.9
summary(GE)
## Date StockPrice
## Min. :1970-01-01 Min. : 9.29
## 1st Qu.:1979-12-24 1st Qu.: 44.21
## Median :1989-12-16 Median : 55.81
## Mean :1989-12-15 Mean : 59.30
## 3rd Qu.:1999-12-08 3rd Qu.: 72.23
## Max. :2009-12-01 Max. :156.84
summary(CocaCola)
## Date StockPrice
## Min. :1970-01-01 Min. : 30.1
## 1st Qu.:1979-12-24 1st Qu.: 42.8
## Median :1989-12-16 Median : 51.4
## Mean :1989-12-15 Mean : 60.0
## 3rd Qu.:1999-12-08 3rd Qu.: 69.6
## Max. :2009-12-01 Max. :146.6
summary(Boeing)
## Date StockPrice
## Min. :1970-01-01 Min. : 12.7
## 1st Qu.:1979-12-24 1st Qu.: 34.6
## Median :1989-12-16 Median : 44.9
## Mean :1989-12-15 Mean : 46.6
## 3rd Qu.:1999-12-08 3rd Qu.: 57.2
## Max. :2009-12-01 Max. :107.3
summary(ProcterGamble)
## Date StockPrice
## Min. :1970-01-01 Min. : 46.9
## 1st Qu.:1979-12-24 1st Qu.: 62.5
## Median :1989-12-16 Median : 78.3
## Mean :1989-12-15 Mean : 77.7
## 3rd Qu.:1999-12-08 3rd Qu.: 89.5
## Max. :2009-12-01 Max. :149.6
plot(CocaCola$Date, CocaCola$StockPrice, type = "l", col = "red")
lines(ProcterGamble$Date, ProcterGamble$StockPrice, col = "blue")
abline(v = as.Date(c("2000-03-01")), lwd = 2)
# Visualizing stock dynamics from 1995 to 2005
plot(CocaCola$Date[301:432], CocaCola$StockPrice[301:432], type = "l", col = "red",
ylim = c(0, 210), xlab = "Year", ylab = "Stock Price", lwd = 2, main = "Stock Price from 1995 to 2005")
lines(ProcterGamble$Date[301:432], ProcterGamble$StockPrice[301:432], col = "blue",
lwd = 2)
lines(IBM$Date[301:432], IBM$StockPrice[301:432], col = "green", lwd = 2)
lines(GE$Date[301:432], GE$StockPrice[301:432], col = "purple", lwd = 2)
lines(Boeing$Date[301:432], Boeing$StockPrice[301:432], col = "orange", lwd = 2)
legend("topright", c("Coca Cola", "Procter Gamble", "IBM", "GE", "Boeing"),
lty = c(1, 1), lwd = c(2, 2, 2, 2, 2), col = c("red", "blue", "green", "purple",
"orange"))
# Companies with a decreasing trend in their stock price from September
# 1997 to November 1997. Create vertical lines at September 1997 and
# November 1997 abline(v=as.Date(c('1997-09-01')), lwd=2)
# abline(v=as.Date(c('1997-11-01')), lwd=2)
# let's see if stocks tend to be higher or lower during certain months.
tapply(IBM$StockPrice, months(IBM$Date), mean) > mean(IBM$StockPrice)
## April August December February January July June
## TRUE FALSE FALSE TRUE TRUE FALSE FALSE
## March May November October September
## TRUE TRUE FALSE FALSE FALSE
tapply(GE$StockPrice, months(GE$Date), mean)
## April August December February January July June
## 64.48 56.50 59.10 62.52 62.05 56.73 56.47
## March May November October September
## 63.15 60.87 57.29 56.24 56.24
tapply(CocaCola$StockPrice, months(CocaCola$Date), mean)
## April August December February January July June
## 62.69 58.88 59.73 60.73 60.37 58.98 60.81
## March May November October September
## 62.07 61.44 59.10 57.94 57.60
tapply(Boeing$StockPrice, months(Boeing$Date), mean)
## April August December February January July June
## 47.05 46.86 46.17 46.89 46.51 46.55 47.39
## March May November October September
## 46.88 48.14 45.15 45.22 46.30
tapply(ProcterGamble$StockPrice, months(ProcterGamble$Date), mean)
## April August December February January July June
## 77.69 76.82 78.30 79.03 79.62 76.65 77.39
## March May November October September
## 77.35 77.86 78.46 76.68 76.62