Yahoo! Finance provides financial data including stock quotes. Historical data from different companies and indexes can be downloaded from their website in an Excel file, but R also provides tools (e.g. the tidyquant
and BatchGetSymbols
R packages) to download the data directly, provided you know the corresponding Stock Ticker Symbol. The best way for downloading data from Yahoo! Finance depends on your specific needs.
The following R code shows how to download data from Berkshire Hathaway Inc. for a specific period of time, and how to produce some basic summaries.
rm(list=ls())
#########################################################################################
# Method 1
#########################################################################################
# Required packages
library(tidyquant)
library(ggplot2)
library(dplyr)
# Download the share prices for Berkshire Hathaway (BRK-B)
# from = '2010-01-01', to = "2020-01-01"
getSymbols("BRK-B", from = '2010-01-01',
to = "2020-01-01",warnings = FALSE,
auto.assign = TRUE)
## [1] "BRK-B"
# An excerpt of the data
head(`BRK-B`)
## BRK-B.Open BRK-B.High BRK-B.Low BRK-B.Close BRK-B.Volume
## 2010-01-04 66.00 66.50 65.92 66.22 1575000
## 2010-01-05 66.39 66.59 66.15 66.54 1310000
## 2010-01-06 66.50 66.50 66.20 66.20 1760000
## 2010-01-07 66.20 66.51 66.14 66.46 1505000
## 2010-01-08 66.48 66.48 66.30 66.44 1500000
## 2010-01-11 66.64 66.87 66.16 66.50 2620000
## BRK-B.Adjusted
## 2010-01-04 66.22
## 2010-01-05 66.54
## 2010-01-06 66.20
## 2010-01-07 66.46
## 2010-01-08 66.44
## 2010-01-11 66.50
# Transforming the data into a data frame
dat <- data.frame(`BRK-B`)
colnames(dat) <- names(`BRK-B`)
# Let's focus on the Adjusted close price
data1 <- dat[,"BRK-B.Adjusted"]
hist(data1, breaks = 50, xlab = "BRK-B.Adjusted", probability = TRUE,
cex.axis = 1.5, cex.lab = 1.5)
box()
summary(data1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 64.94 84.67 134.12 135.09 171.81 227.05
# Time series plot
ts <- data.frame(
day = as.Date(rownames(dat)),
Price = dat[,"BRK-B.Adjusted"]
)
p <- ggplot(ts, aes(x=day, y=Price)) +
geom_line() + theme(text = element_text(size = 20), axis.text.x = element_text(size = 10))
p+scale_x_date(date_labels = "%Y %b %d")
# Calculating the 1-day log returns
# Sample size
n <- length(data1)
# Log returns
LR <- log( data1[-1]/ data1[-n])
hist(LR, breaks = 50, xlab = "Log-Returns", probability = TRUE,
cex.axis = 1.5, cex.lab = 1.5, main = "1-Day Log-returns")
box()
rm(list=ls())
#########################################################################################
# Method 2
#########################################################################################
# Required packages
library(BatchGetSymbols)
library(ggplot2)
library(dplyr)
# Download the share prices for Berkshire Hathaway (BRK-B)
# from = '2010-01-01', to = "2020-01-01"
BRK <- BatchGetSymbols(tickers = 'BRK-B',
first.date = '2010-01-01',
last.date = '2020-01-01',
freq.data = 'daily',
cache.folder = file.path(tempdir(),
'BRK-B') )
# Transforming the data into a data frame
dat <- data.frame(BRK$df.tickers)
colnames(dat) <- names(BRK$df.tickers)
# An excerpt of the data
head(dat)
## price.open price.high price.low price.close volume price.adjusted ref.date
## 1 66.00 66.50 65.92 66.22 1575000 66.22 2010-01-04
## 2 66.39 66.59 66.15 66.54 1310000 66.54 2010-01-05
## 3 66.50 66.50 66.20 66.20 1760000 66.20 2010-01-06
## 4 66.20 66.51 66.14 66.46 1505000 66.46 2010-01-07
## 5 66.48 66.48 66.30 66.44 1500000 66.44 2010-01-08
## 6 66.64 66.87 66.16 66.50 2620000 66.50 2010-01-11
## ticker ret.adjusted.prices ret.closing.prices
## 1 BRK-B NA NA
## 2 BRK-B 0.0048323769 0.0048323769
## 3 BRK-B -0.0051097685 -0.0051097685
## 4 BRK-B 0.0039275228 0.0039275228
## 5 BRK-B -0.0003008878 -0.0003008878
## 6 BRK-B 0.0009030403 0.0009030403
# Let's focus on the Adjusted close price
data1 <- dat[,"price.adjusted"]
hist(data1, breaks = 50, xlab = "price.adjusted", probability = TRUE,
cex.axis = 1.5, cex.lab = 1.5)
box()
summary(data1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 64.94 84.67 134.12 135.09 171.81 227.05
# Time series plot
ts <- data.frame(
day = dat$ref.date,
Price = dat[,"price.adjusted"]
)
p <- ggplot(ts, aes(x=day, y=Price)) +
geom_line() + theme(text = element_text(size = 20), axis.text.x = element_text(size = 10))
p+scale_x_date(date_labels = "%Y %b %d")
# Calculating the 1-day log returns
# Sample size
n <- length(data1)
# Log returns
LR <- log( data1[-1]/ data1[-n])
hist(LR, breaks = 50, xlab = "Log-Returns", probability = TRUE,
cex.axis = 1.5, cex.lab = 1.5, main = "1-Day Log-returns")
box()