This is the first task pertaining to algo trading. This task involves trying to find pattern through visualization. The document is presented in the following manner:
library(lubridate)
library(dplyr)
library(tidyr)
library(xts)
library(quantmod)
library(PerformanceAnalytics)
library(RJSONIO)
library(TTR)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
setwd("Z:/Ferguson 13 march/other assignment/DS/Stock Market App/Data/OHLC")
kse <- read.csv("ksedata(full).csv")
kse <- kse[kse$symbol != "symbol",]
kse <- kse[,1:7]
kse[,3:7] <- lapply(kse[,3:7], function(x) as.numeric(as.character(x)))
kse$date <- dmy(kse$date)
kse <- kse[!duplicated(kse),]
kse <- kse[!duplicated(kse[,1:2]),]
cement_tickers <- c("ACPL
BWCL
CHCC
DBCI
DCL
DGKC
DNCC
FCCL
FECTC
FLYNG
GWLC
JVDC
JVDCPS
KOHC
LPCL
LUCK
MLCF
PAKCEM
PIOC
PIOCR
POWER
SMCPL
THCCL
ZELP"
)
cement_tickers <- trimws(unlist(strsplit(cement_tickers, "\n", fixed = FALSE)))
kse2 <- subset(kse, kse[,1] %in% cement_tickers, drop = TRUE)
kse2 <- kse2[c(2,1,6,7)]
#plot all cement industry
ggplot(kse2, aes(date, close)) + geom_line(aes(colour = symbol, group = symbol)) + facet_grid(symbol ~ .,
scales = "free")
# plot by volume quantile
quan <- quantile(kse2$volume, probs = c(.33,.67,1))
kse3 <- na.omit(kse2)
kse3 <- spread(kse3,symbol, close)
cement_volume <- spread(kse2, symbol, volume)
cement_volume <- cement_volume[,c(-2)]
cement_mean <- sapply(cement_volume[c(-1)], function(x) mean(x, na.rm = TRUE))
cement_mean <- as.data.frame(cement_mean)
cement_mean$symbol <- rownames(cement_mean)
rownames(cement_mean) <- NULL
colnames(cement_mean)[1] <- "mean"
score_low <- cement_mean[cement_mean$mean < quan[1],]
low_tickers <- unique(score_low$symbol)
kse_low_volume <- kse2[kse2$symbol %in% low_tickers,]
ggplot(kse_low_volume, aes(date, close)) + geom_line(aes(colour = symbol, group = symbol)) +
facet_grid(symbol~.,scales = "free")
score_middle <- cement_mean[cement_mean$mean > quan[1] &
cement_mean <= quan[2],]
middle_tickers <- unique(score_middle$symbol)
kse_middle_volume <- kse2[kse2$symbol %in% middle_tickers,]
ggplot(kse_middle_volume, aes(date, close)) + geom_line(aes(colour = symbol, group = symbol)) +
facet_grid(symbol~.,scales = "free")
score_high <- cement_mean[cement_mean$mean >= quan[2] &
cement_mean <= quan[3],]
high_tickers <- unique(score_high$symbol)
kse_high_volume <- kse2[kse2$symbol %in% high_tickers,]
ggplot(kse_high_volume, aes(date, close)) + geom_line(aes(colour = symbol, group = symbol)) +
facet_grid(symbol~.,scales = "free")
library(PerformanceAnalytics)
compute_pairwise_correlations <- function(prices) {
# Name: compute_pairwise_correlations
# Purpose: Calculates pairwise correlations of returns
# and plots the pairwise relationships
# Inputs: prices = data.frame of prices
# Output: correlation_matrix = A correlation matrix
# Convert prices to returns
returns <- apply(prices[,c(-1)], 2, function(x) diff(log(x)))
# Plot all the pairwise relationships
chart.Correlation(returns, main = "Pairwise return scatter plot")
# Compute the pairwise correlations
correlation_matrix <- cor(returns, use = "pairwise.complete.obs")
return(correlation_matrix)
}
{r} # kse_low_volume <- kse_low_volume[,c(-4)] # kse_low_volume <- spread(kse_low_volume, symbol, close) # kse_low_correlation <- compute_pairwise_correlations(kse_low_volume) #
kse_middle_volume <- kse_middle_volume[,c(-4)]
kse_middle_volume <- spread(kse_middle_volume, symbol, close)
kse_middle_correlation <- compute_pairwise_correlations(kse_middle_volume)
kse_high_volume <- kse_high_volume[,c(-4)]
kse_high_volume <- spread(kse_high_volume, symbol, close)
kse_high_correlation <- compute_pairwise_correlations(kse_high_volume)
kse_cor <- kse[,c(2,1,6)]
kse_cor <- spread(kse_cor, symbol, close)
returns <- apply(kse_cor[,c(-1)], 2, function(x) diff(log(x)))
returns <- as.data.frame(returns)
unwanted <- colnames(returns[,grepl("KSE", names(returns))])
returns2 <- returns[, !grepl("^KSE", colnames(returns))]
kse_return <- returns2[,sapply(returns2, function(x) sum(is.na(x))/length(x) < .7)]
correlation_matrix <- cor(kse_return, use = "p")
library(caret)
## Loading required package: lattice
high_cor <- findCorrelation(correlation_matrix, 0.60 )
high_data <- kse_return[,high_cor]
correlation_matrix2 <- cor(high_data, use = "c")
all_high_correlation <- compute_pairwise_correlations(correlation_matrix2)