Stock data from https://www.investing.com/. In the website, search company stock name and look for historical data (as shown below)
---
In this program, I can use daily or monthly record, however as an amateur investor like me, buying stock monthly is reasonable. Therefore, sometimes I calculate numbers and ratios in monthly records.
The input data is originally downloaded from https://www.investing.com/, my code is adapted to data format from the website.
#Library for data analysis
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
fdata <- read_csv(file="data_FPT_2010_2024_daily.csv")
## Rows: 3521 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Date, Vol., Change %
## num (4): Price, Open, High, Low
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fdata <- janitor::clean_names(fdata)
#---change date column from chr to date types
fdata$date <- mdy(fdata$date)
fdata <- fdata[order(fdata$date),]
#---convert volume into number. e.g 1.84M => 1.84*10^6, 100K => 100*10^3
#step 1: separate the number and word
fdata <- fdata %>%
separate(vol,
into = c("vol_value","vol_unit"),
sep = "(?<=[0-9])(?=[A-Z])") %>%
mutate(vol_value = as.numeric(vol_value))
#step 2: convert word into corresponding values, 1M = 10^6, 1K = 10^3
for (i in 1:length(fdata$vol_value)){
if (fdata$vol_unit[i] == "M") {
fdata$vol_value[i] <- fdata$vol_value[i]*10^6
} else if (fdata$vol_unit[i] == "K"){
fdata$vol_value[i] <- fdata$vol_value[i]*10^3
}
}
#step 3: remove temperatory column
fdata <- fdata %>% subset(select = -c(vol_unit))
#---convert change_percent into number. e.g -0.27% => -0.0027
fdata <- fdata %>%
separate(change_percent,
into = c("change","percent"),
sep = "(?<=[0-9])(?=[%])") %>%
mutate(change = as.numeric(change)/100) %>%
subset(select = -c(percent))
#---remove outlier: I take change from 1st to 99th quantile, the rest are abnormal change => problematic statistics
quartiles <- quantile(fdata$change, probs = c(.01,.99), na.rm = FALSE)
lower <- quartiles[1]
upper <- quartiles[2]
fdata <- subset(fdata, fdata$change>lower & fdata$change<upper)
fdata
## # A tibble: 3,449 × 7
## date price open high low vol_value change
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-04 8190. 8041. 8190. 7842. 1500000 0.0443
## 2 2010-01-05 8190. 8487. 8487. 8190. 1270000 0
## 3 2010-01-06 8041. 8190. 8289. 7892. 603460 -0.0182
## 4 2010-01-07 7991 8041. 8090. 7941. 653390 -0.0062
## 5 2010-01-08 7743. 8140. 8140. 7743. 1030000 -0.0311
## 6 2010-01-11 7693. 7892. 7892. 7594. 805230 -0.0064
## 7 2010-01-12 7544. 7644. 7743. 7544. 1440000 -0.0194
## 8 2010-01-13 7792. 7544. 7792. 7445 930160 0.0329
## 9 2010-01-14 7842. 7941. 7941. 7743. 790150 0.0064
## 10 2010-01-15 7693. 7842. 7842. 7693. 520200 -0.019
## # ℹ 3,439 more rows
#---plot price data---#
ggplot(fdata,aes(x=date)) +
geom_line( aes(y=price) ) +
labs(title="Daily Price of Stock",x="Timeline",y="Price (VND)") +
scale_x_date(date_labels="20%y",breaks="1 year")
#---plot volume data---#
ggplot(fdata,aes(x=date)) +
geom_line( aes(y=vol_value), stat = "identity" ) +
labs(title="Daily Volume of Stock",x="Timeline",y="Volume") +
scale_x_date(date_labels="20%y",breaks="1 year")
ggplot(fdata,aes(x=vol_value)) +
geom_histogram(binwidth = 50000) +
labs(title="Daily Volume of Stock",x="Volume",y="Frequency")
summary(fdata$vol_value)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18970 844200 1460000 1884165 2430000 12100000
How much return is the annual growth if I buy the stock daily?
num_of_year <- as.numeric((fdata$date[length(fdata$date)]-fdata$date[1])/365) #number of year from the first day
m <- mean(fdata$price) #mean
c <- fdata[[length(fdata$price),2]] #current price
stock_return <- (c/m-1)*100 #total growth in percentage
stock_return
## [1] 251.3312
The total growth in 14.13 year of investment is 251.33%.
If instead of buying stock, I save in bank daily (if the data set is monthly, it’s gonna be “I save in the bank monthly”). How much return do I get?
But I have a bigger question, if buying stock is like saving in the bank, how much bank interest is it if I buy the stock monthly?
I will create a look-up table, in which I will know the equivalent bank interest if I buy the stock monthly.
interest_rate_seq <- seq(0.00,0.40,0.01) #bank interest from 0% to 40%
year_from_now <- as.numeric((fdata[[length(fdata$date),1]]-fdata$date)/365)
bank_return <- data.frame(interest_rate_seq)
for (i in 1:length(interest_rate_seq)){
price_now <- fdata$price*((1+interest_rate_seq[i])^year_from_now)
m <- mean(fdata$price)
c <- mean(price_now)
bank_return[i,1] <- (c/m-1)*100 #in percentage
}
lookup_table <- as_tibble(data.frame(interest_rate_seq*100,bank_return))
colnames(lookup_table) <- c("annual_interst","total_growth")
lookup_table
## # A tibble: 41 × 2
## annual_interst total_growth
## <dbl> <dbl>
## 1 0 0
## 2 1 3.90
## 3 2 8.04
## 4 3 12.4
## 5 4 17.1
## 6 5 22.1
## 7 6 27.4
## 8 7 33.1
## 9 8 39.1
## 10 9 45.6
## # ℹ 31 more rows
Use Look up table to find equivalent return
#---Look up for estimated annual growth---#
equivalent_return <- 0 #estimated annual return
for (i in 1:length(interest_rate_seq)){
if(stock_return > lookup_table$total_growth[i] &&
stock_return < lookup_table$total_growth[i+1])
{
equivalent_return <- lookup_table$annual_interst[i]+
(stock_return-lookup_table$total_growth[i])/
(lookup_table$total_growth[i+1]-lookup_table$total_growth[i])/100
}
}
equivalent_return
## [1] 25.00508
If I use those money to saving in the bank instead of buying stock, the bank interest must be equivalent to 25.01 %
Volatility measure the degree of variation of a trading price series overtime.
Why does volatility matter? …
Look at daily change in percentage
ggplot(data = fdata, aes(x=change*100))+
geom_histogram(binwidth = 0.1)+
labs(x ="Change in %", title="Distribution of change in percentage")
summary(fdata$change*100)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.60000 -0.71000 0.00000 0.09277 0.86000 4.84000
sd(fdata$change*100)
## [1] 1.435804
Calculate volatility
volatility <- sd(fdata$change)*sqrt(252) #from daily votality to annual votality
volatility
## [1] 0.2279269
Volatility is 0.2279269
Import market price - ETF VN100
vn100 <- read_csv(file="data_VN100_2010_2024_daily.csv")
## Rows: 2318 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Date, Vol., Change %
## num (4): Price, Open, High, Low
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vn100 <- janitor::clean_names(vn100)
#change date column from chr to date types
vn100$date <- mdy(vn100$date)
vn100 <- vn100[order(vn100$date),]
#convert change_percent into number. e.g -0.27% => -0.0027
vn100 <- vn100 %>%
separate(change_percent,
into = c("change","percent"),
sep = "(?<=[0-9])(?=[%])") %>%
mutate(change = as.numeric(change)/100) %>%
subset(select = -c(percent))
vn100
## # A tibble: 2,318 × 7
## date price open high low vol change
## <date> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 2014-11-05 580. 580. 584. 576. 82.65K -0.0027
## 2 2014-11-06 580. 580. 583. 579. 50.59K 0.0007
## 3 2014-11-07 585. 581. 585. 580. 80.05K 0.0081
## 4 2014-11-10 586. 586. 589. 586. 86.06K 0.0014
## 5 2014-11-11 585. 586. 589. 584. 92.47K -0.0016
## 6 2014-11-12 585. 586. 587. 584. 73.92K 0.0005
## 7 2014-11-13 583. 586. 587. 583. 89.93K -0.0039
## 8 2014-11-14 581. 582. 582. 576. 94.94K -0.0022
## 9 2014-11-17 581. 583. 584. 581. 61.30K -0.001
## 10 2014-11-18 573. 581. 581. 573. 78.63K -0.0131
## # ℹ 2,308 more rows
market_vs_stock <- vn100 %>%
full_join(fdata,by="date") %>%
select("date","price.x","price.y","change.x","change.y") %>%
na.omit()
colnames(market_vs_stock) = c("date","market_score","stock_price","market_return","stock_return")
market_vs_stock
## # A tibble: 2,272 × 5
## date market_score stock_price market_return stock_return
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2014-11-05 580. 12623. -0.0027 -0.0098
## 2 2014-11-06 580. 12498. 0.0007 -0.0099
## 3 2014-11-07 585. 12623. 0.0081 0.01
## 4 2014-11-10 586. 12623. 0.0014 0
## 5 2014-11-11 585. 12748. -0.0016 0.0099
## 6 2014-11-12 585. 12623. 0.0005 -0.0098
## 7 2014-11-13 583. 12623. -0.0039 0
## 8 2014-11-14 581. 12498. -0.0022 -0.0099
## 9 2014-11-17 581. 12473. -0.001 -0.002
## 10 2014-11-18 573. 12273. -0.0131 -0.016
## # ℹ 2,262 more rows
Visualize the change in market and stock value changes on the same scale. The y-axis value is the score according to market score.
I convert stock price into stock score with the same beginning point with market score
market_vs_stock_visual <- market_vs_stock %>%
mutate(stock_score = stock_price*market_vs_stock$market_score[1]/market_vs_stock$stock_price[1]) %>%
pivot_longer(cols = c("market_score","stock_score"),
names_to = "type",
values_to = "score")
market_vs_stock_visual
## # A tibble: 4,544 × 6
## date stock_price market_return stock_return type score
## <date> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 2014-11-05 12623. -0.0027 -0.0098 market_score 580.
## 2 2014-11-05 12623. -0.0027 -0.0098 stock_score 580.
## 3 2014-11-06 12498. 0.0007 -0.0099 market_score 580.
## 4 2014-11-06 12498. 0.0007 -0.0099 stock_score 574.
## 5 2014-11-07 12623. 0.0081 0.01 market_score 585.
## 6 2014-11-07 12623. 0.0081 0.01 stock_score 580.
## 7 2014-11-10 12623. 0.0014 0 market_score 586.
## 8 2014-11-10 12623. 0.0014 0 stock_score 580.
## 9 2014-11-11 12748. -0.0016 0.0099 market_score 585.
## 10 2014-11-11 12748. -0.0016 0.0099 stock_score 585.
## # ℹ 4,534 more rows
ggplot(market_vs_stock_visual,aes(x=date, y=score, color=type)) +
geom_line()+
labs(title="Monthly Price of Stock",
x="Time",
y="Pseudo Price")+
scale_x_date(date_labels="20%y",breaks="1 year")+
scale_color_manual(values = c("grey50","red"))
Calculate Beta
beta <- cov(market_vs_stock$market_return,market_vs_stock$stock_return)/var(market_vs_stock$market_return)
beta
## [1] 0.8120956
read more at: https://www.financestrategists.com/wealth-management/fundamental-vs-technical-analysis/beta/