STOCK PRICE ANALYSIS

I. Explore Data Analysis

Stock data from https://www.investing.com/. In the website, search company stock name and look for historical data (as shown below)

---

---
---

In this program, I can use daily or monthly record, however as an amateur investor like me, buying stock monthly is reasonable. Therefore, sometimes I calculate numbers and ratios in monthly records.

The input data is originally downloaded from https://www.investing.com/, my code is adapted to data format from the website.

#Library for data analysis
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(lubridate)
fdata <- read_csv(file="data_FPT_2010_2024_daily.csv")
## Rows: 3521 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Date, Vol., Change %
## num (4): Price, Open, High, Low
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fdata <- janitor::clean_names(fdata)

#---change date column from chr to date types
fdata$date <- mdy(fdata$date)
fdata <- fdata[order(fdata$date),]

#---convert volume into number. e.g 1.84M => 1.84*10^6, 100K => 100*10^3
#step 1: separate the number and word
fdata <- fdata %>% 
  separate(vol,
           into = c("vol_value","vol_unit"),
           sep = "(?<=[0-9])(?=[A-Z])") %>% 
  mutate(vol_value = as.numeric(vol_value))
#step 2: convert word into corresponding values, 1M = 10^6, 1K = 10^3
for (i in 1:length(fdata$vol_value)){
  if (fdata$vol_unit[i] == "M") {
    fdata$vol_value[i] <- fdata$vol_value[i]*10^6
  } else if (fdata$vol_unit[i] == "K"){
    fdata$vol_value[i] <- fdata$vol_value[i]*10^3
  }
}
#step 3: remove temperatory column 
fdata <- fdata %>% subset(select = -c(vol_unit))

#---convert change_percent into number. e.g -0.27% => -0.0027
fdata <- fdata %>% 
  separate(change_percent,
           into = c("change","percent"),
           sep = "(?<=[0-9])(?=[%])") %>%
  mutate(change = as.numeric(change)/100) %>% 
  subset(select = -c(percent))

#---remove outlier: I take change from 1st to 99th quantile, the rest are abnormal change => problematic statistics
quartiles <- quantile(fdata$change, probs = c(.01,.99), na.rm = FALSE)
lower <- quartiles[1]
upper <- quartiles[2]
fdata <- subset(fdata, fdata$change>lower & fdata$change<upper) 

fdata
## # A tibble: 3,449 × 7
##    date       price  open  high   low vol_value  change
##    <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>   <dbl>
##  1 2010-01-04 8190. 8041. 8190. 7842.   1500000  0.0443
##  2 2010-01-05 8190. 8487. 8487. 8190.   1270000  0     
##  3 2010-01-06 8041. 8190. 8289. 7892.    603460 -0.0182
##  4 2010-01-07 7991  8041. 8090. 7941.    653390 -0.0062
##  5 2010-01-08 7743. 8140. 8140. 7743.   1030000 -0.0311
##  6 2010-01-11 7693. 7892. 7892. 7594.    805230 -0.0064
##  7 2010-01-12 7544. 7644. 7743. 7544.   1440000 -0.0194
##  8 2010-01-13 7792. 7544. 7792. 7445     930160  0.0329
##  9 2010-01-14 7842. 7941. 7941. 7743.    790150  0.0064
## 10 2010-01-15 7693. 7842. 7842. 7693.    520200 -0.019 
## # ℹ 3,439 more rows
#---plot price data---#
ggplot(fdata,aes(x=date)) +
  geom_line( aes(y=price) ) +
    labs(title="Daily Price of Stock",x="Timeline",y="Price (VND)") +
     scale_x_date(date_labels="20%y",breaks="1 year")

#---plot volume data---#
ggplot(fdata,aes(x=date)) +
  geom_line( aes(y=vol_value), stat = "identity" ) +
    labs(title="Daily Volume of Stock",x="Timeline",y="Volume") +
     scale_x_date(date_labels="20%y",breaks="1 year")

ggplot(fdata,aes(x=vol_value)) +
  geom_histogram(binwidth = 50000) +
    labs(title="Daily Volume of Stock",x="Volume",y="Frequency")

summary(fdata$vol_value)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    18970   844200  1460000  1884165  2430000 12100000

II. Calculate ratio

1. Return if buy daily/monthly (depend on daily or month data)

How much return is the annual growth if I buy the stock daily?

num_of_year <- as.numeric((fdata$date[length(fdata$date)]-fdata$date[1])/365) #number of year from the first day

m <- mean(fdata$price) #mean
c <- fdata[[length(fdata$price),2]] #current price
stock_return <- (c/m-1)*100 #total growth in percentage
stock_return
## [1] 251.3312

The total growth in 14.13 year of investment is 251.33%.

If instead of buying stock, I save in bank daily (if the data set is monthly, it’s gonna be “I save in the bank monthly”). How much return do I get?

But I have a bigger question, if buying stock is like saving in the bank, how much bank interest is it if I buy the stock monthly?

I will create a look-up table, in which I will know the equivalent bank interest if I buy the stock monthly.

interest_rate_seq <- seq(0.00,0.40,0.01) #bank interest from 0% to 40%
year_from_now <- as.numeric((fdata[[length(fdata$date),1]]-fdata$date)/365) 
bank_return <- data.frame(interest_rate_seq)

for (i in 1:length(interest_rate_seq)){
  price_now <- fdata$price*((1+interest_rate_seq[i])^year_from_now)
  m <- mean(fdata$price)
  c <- mean(price_now)
  bank_return[i,1] <- (c/m-1)*100 #in percentage
}

lookup_table <- as_tibble(data.frame(interest_rate_seq*100,bank_return))
colnames(lookup_table) <- c("annual_interst","total_growth")
lookup_table
## # A tibble: 41 × 2
##    annual_interst total_growth
##             <dbl>        <dbl>
##  1              0         0   
##  2              1         3.90
##  3              2         8.04
##  4              3        12.4 
##  5              4        17.1 
##  6              5        22.1 
##  7              6        27.4 
##  8              7        33.1 
##  9              8        39.1 
## 10              9        45.6 
## # ℹ 31 more rows

Use Look up table to find equivalent return

#---Look up for estimated annual growth---#
equivalent_return <- 0 #estimated annual return

for (i in 1:length(interest_rate_seq)){
  if(stock_return > lookup_table$total_growth[i] && 
     stock_return < lookup_table$total_growth[i+1])
    {
    equivalent_return <- lookup_table$annual_interst[i]+
      (stock_return-lookup_table$total_growth[i])/
      (lookup_table$total_growth[i+1]-lookup_table$total_growth[i])/100
  }
}

equivalent_return
## [1] 25.00508

If I use those money to saving in the bank instead of buying stock, the bank interest must be equivalent to 25.01 %

2. Volatility

Volatility measure the degree of variation of a trading price series overtime.

Why does volatility matter? …

Look at daily change in percentage

ggplot(data = fdata, aes(x=change*100))+
  geom_histogram(binwidth = 0.1)+
  labs(x ="Change in %", title="Distribution of change in percentage")

summary(fdata$change*100)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -4.60000 -0.71000  0.00000  0.09277  0.86000  4.84000
sd(fdata$change*100)
## [1] 1.435804

Calculate volatility

volatility <- sd(fdata$change)*sqrt(252) #from daily votality to annual votality
volatility
## [1] 0.2279269

Volatility is 0.2279269

3. Beta

Import market price - ETF VN100

vn100 <- read_csv(file="data_VN100_2010_2024_daily.csv")
## Rows: 2318 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Date, Vol., Change %
## num (4): Price, Open, High, Low
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vn100 <- janitor::clean_names(vn100)

#change date column from chr to date types
vn100$date <- mdy(vn100$date)
vn100 <- vn100[order(vn100$date),]

#convert change_percent into number. e.g -0.27% => -0.0027
vn100 <- vn100 %>% 
  separate(change_percent,
           into = c("change","percent"),
           sep = "(?<=[0-9])(?=[%])") %>%
  mutate(change = as.numeric(change)/100) %>% 
  subset(select = -c(percent))

vn100
## # A tibble: 2,318 × 7
##    date       price  open  high   low vol     change
##    <date>     <dbl> <dbl> <dbl> <dbl> <chr>    <dbl>
##  1 2014-11-05  580.  580.  584.  576. 82.65K -0.0027
##  2 2014-11-06  580.  580.  583.  579. 50.59K  0.0007
##  3 2014-11-07  585.  581.  585.  580. 80.05K  0.0081
##  4 2014-11-10  586.  586.  589.  586. 86.06K  0.0014
##  5 2014-11-11  585.  586.  589.  584. 92.47K -0.0016
##  6 2014-11-12  585.  586.  587.  584. 73.92K  0.0005
##  7 2014-11-13  583.  586.  587.  583. 89.93K -0.0039
##  8 2014-11-14  581.  582.  582.  576. 94.94K -0.0022
##  9 2014-11-17  581.  583.  584.  581. 61.30K -0.001 
## 10 2014-11-18  573.  581.  581.  573. 78.63K -0.0131
## # ℹ 2,308 more rows
market_vs_stock <- vn100 %>% 
  full_join(fdata,by="date") %>% 
  select("date","price.x","price.y","change.x","change.y") %>% 
  na.omit()

colnames(market_vs_stock) = c("date","market_score","stock_price","market_return","stock_return")

market_vs_stock
## # A tibble: 2,272 × 5
##    date       market_score stock_price market_return stock_return
##    <date>            <dbl>       <dbl>         <dbl>        <dbl>
##  1 2014-11-05         580.      12623.       -0.0027      -0.0098
##  2 2014-11-06         580.      12498.        0.0007      -0.0099
##  3 2014-11-07         585.      12623.        0.0081       0.01  
##  4 2014-11-10         586.      12623.        0.0014       0     
##  5 2014-11-11         585.      12748.       -0.0016       0.0099
##  6 2014-11-12         585.      12623.        0.0005      -0.0098
##  7 2014-11-13         583.      12623.       -0.0039       0     
##  8 2014-11-14         581.      12498.       -0.0022      -0.0099
##  9 2014-11-17         581.      12473.       -0.001       -0.002 
## 10 2014-11-18         573.      12273.       -0.0131      -0.016 
## # ℹ 2,262 more rows

Visualize the change in market and stock value changes on the same scale. The y-axis value is the score according to market score.

I convert stock price into stock score with the same beginning point with market score

market_vs_stock_visual <- market_vs_stock %>% 
  mutate(stock_score = stock_price*market_vs_stock$market_score[1]/market_vs_stock$stock_price[1]) %>% 
  pivot_longer(cols = c("market_score","stock_score"),
               names_to = "type",
               values_to = "score") 
market_vs_stock_visual
## # A tibble: 4,544 × 6
##    date       stock_price market_return stock_return type         score
##    <date>           <dbl>         <dbl>        <dbl> <chr>        <dbl>
##  1 2014-11-05      12623.       -0.0027      -0.0098 market_score  580.
##  2 2014-11-05      12623.       -0.0027      -0.0098 stock_score   580.
##  3 2014-11-06      12498.        0.0007      -0.0099 market_score  580.
##  4 2014-11-06      12498.        0.0007      -0.0099 stock_score   574.
##  5 2014-11-07      12623.        0.0081       0.01   market_score  585.
##  6 2014-11-07      12623.        0.0081       0.01   stock_score   580.
##  7 2014-11-10      12623.        0.0014       0      market_score  586.
##  8 2014-11-10      12623.        0.0014       0      stock_score   580.
##  9 2014-11-11      12748.       -0.0016       0.0099 market_score  585.
## 10 2014-11-11      12748.       -0.0016       0.0099 stock_score   585.
## # ℹ 4,534 more rows
ggplot(market_vs_stock_visual,aes(x=date, y=score, color=type)) +
  geom_line()+
  labs(title="Monthly Price of Stock",
       x="Time",
       y="Pseudo Price")+
  scale_x_date(date_labels="20%y",breaks="1 year")+
  scale_color_manual(values = c("grey50","red"))

Calculate Beta

beta <- cov(market_vs_stock$market_return,market_vs_stock$stock_return)/var(market_vs_stock$market_return)

beta
## [1] 0.8120956

read more at: https://www.financestrategists.com/wealth-management/fundamental-vs-technical-analysis/beta/

4. Sharpe ratio

Sharpe ratio

portfolio_return <- ((fdata$price[length(fdata$price)] - fdata$price[1])/fdata$price[1])^(1/num_of_year)
risk_free_rate <- 0.05

sharpe_ratio <- (portfolio_return-risk_free_rate)/volatility
sharpe_ratio
## [1] 5.006658

Sharpe ratio is 5.0066579