Yahoo! Finance data

Yahoo! Finance provides financial data including stock quotes. Historical data from different companies and indexes can be downloaded from their website in an Excel file, but R also provides tools (e.g. the tidyquant and BatchGetSymbols R packages) to download the data directly, provided you know the corresponding Stock Ticker Symbol. The best way for downloading data from Yahoo! Finance depends on your specific needs.

The following R code shows how to download data from Berkshire Hathaway Inc. for a specific period of time, and how to produce some basic summaries.

R code

Method 1

rm(list=ls())
#########################################################################################
# Method 1
#########################################################################################
# Required packages
library(tidyquant)
library(ggplot2)
library(dplyr)

# Download the share prices for Berkshire Hathaway (BRK-B) 
# from = '2010-01-01', to = "2020-01-01"
getSymbols("BRK-B", from = '2010-01-01',
           to = "2020-01-01",warnings = FALSE,
           auto.assign = TRUE)
## [1] "BRK-B"
# An excerpt of the data
head(`BRK-B`)
##            BRK-B.Open BRK-B.High BRK-B.Low BRK-B.Close BRK-B.Volume
## 2010-01-04      66.00      66.50     65.92       66.22      1575000
## 2010-01-05      66.39      66.59     66.15       66.54      1310000
## 2010-01-06      66.50      66.50     66.20       66.20      1760000
## 2010-01-07      66.20      66.51     66.14       66.46      1505000
## 2010-01-08      66.48      66.48     66.30       66.44      1500000
## 2010-01-11      66.64      66.87     66.16       66.50      2620000
##            BRK-B.Adjusted
## 2010-01-04          66.22
## 2010-01-05          66.54
## 2010-01-06          66.20
## 2010-01-07          66.46
## 2010-01-08          66.44
## 2010-01-11          66.50
# Transforming the data into a data frame
dat <- data.frame(`BRK-B`)
colnames(dat) <- names(`BRK-B`)

# Let's focus on the Adjusted close price 
data1 <- dat[,"BRK-B.Adjusted"]

hist(data1, breaks = 50, xlab = "BRK-B.Adjusted", probability = TRUE, 
     cex.axis = 1.5, cex.lab = 1.5)
box()

summary(data1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   64.94   84.67  134.12  135.09  171.81  227.05
# Time series plot
ts <- data.frame(
  day = as.Date(rownames(dat)),
  Price = dat[,"BRK-B.Adjusted"]
)

p <- ggplot(ts, aes(x=day, y=Price)) +
  geom_line()  + theme(text = element_text(size = 20), axis.text.x = element_text(size = 10))
p+scale_x_date(date_labels = "%Y %b %d")

# Calculating the 1-day log returns

# Sample size
n <- length(data1)

# Log returns
LR <- log( data1[-1]/ data1[-n])

hist(LR, breaks = 50, xlab = "Log-Returns", probability = TRUE, 
     cex.axis = 1.5, cex.lab = 1.5, main = "1-Day Log-returns")
box() 

Method 2

rm(list=ls())
#########################################################################################
# Method 2
#########################################################################################
# Required packages
library(BatchGetSymbols)
library(ggplot2)
library(dplyr)

# Download the share prices for Berkshire Hathaway (BRK-B) 
# from = '2010-01-01', to = "2020-01-01"
BRK <- BatchGetSymbols(tickers = 'BRK-B', 
                first.date = '2010-01-01',
                last.date = '2020-01-01', 
                freq.data = 'daily',
                cache.folder = file.path(tempdir(), 
                                         'BRK-B') ) 


# Transforming the data into a data frame
dat <- data.frame(BRK$df.tickers)
colnames(dat) <- names(BRK$df.tickers)

# An excerpt of the data
head(dat)
##   price.open price.high price.low price.close  volume price.adjusted   ref.date
## 1      66.00      66.50     65.92       66.22 1575000          66.22 2010-01-04
## 2      66.39      66.59     66.15       66.54 1310000          66.54 2010-01-05
## 3      66.50      66.50     66.20       66.20 1760000          66.20 2010-01-06
## 4      66.20      66.51     66.14       66.46 1505000          66.46 2010-01-07
## 5      66.48      66.48     66.30       66.44 1500000          66.44 2010-01-08
## 6      66.64      66.87     66.16       66.50 2620000          66.50 2010-01-11
##   ticker ret.adjusted.prices ret.closing.prices
## 1  BRK-B                  NA                 NA
## 2  BRK-B        0.0048323769       0.0048323769
## 3  BRK-B       -0.0051097685      -0.0051097685
## 4  BRK-B        0.0039275228       0.0039275228
## 5  BRK-B       -0.0003008878      -0.0003008878
## 6  BRK-B        0.0009030403       0.0009030403
# Let's focus on the Adjusted close price 
data1 <- dat[,"price.adjusted"]

hist(data1, breaks = 50, xlab = "price.adjusted", probability = TRUE, 
     cex.axis = 1.5, cex.lab = 1.5)
box()

summary(data1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   64.94   84.67  134.12  135.09  171.81  227.05
# Time series plot
ts <- data.frame(
  day = dat$ref.date,
  Price = dat[,"price.adjusted"]
)

p <- ggplot(ts, aes(x=day, y=Price)) +
  geom_line()  + theme(text = element_text(size = 20), axis.text.x = element_text(size = 10))
p+scale_x_date(date_labels = "%Y %b %d")

# Calculating the 1-day log returns

# Sample size
n <- length(data1)

# Log returns
LR <- log( data1[-1]/ data1[-n])

hist(LR, breaks = 50, xlab = "Log-Returns", probability = TRUE, 
     cex.axis = 1.5, cex.lab = 1.5, main = "1-Day Log-returns")
box()