This is the first task pertaining to algo trading. This task involves trying to find pattern through visualization. The document is presented in the following manner:

For a industry wise analysis, I have selected cement industry for the prototype. Going forward all industries will be analysed.
Next, I will present the correlation of returns in the cement industry.
Lastly, all the stocks in the KSE are analyzed where the correlation are found for the highest volume

Data Maniputation for Cement sector

library(lubridate)
library(dplyr)
library(tidyr)
library(xts)
library(quantmod)
library(PerformanceAnalytics)
library(RJSONIO)
library(TTR)
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.2.4

setwd("Z:/Ferguson 13 march/other assignment/DS/Stock Market App/Data/OHLC")
kse <- read.csv("ksedata(full).csv")
kse  <- kse[kse$symbol != "symbol",]

kse <- kse[,1:7]
kse[,3:7] <- lapply(kse[,3:7], function(x) as.numeric(as.character(x)))

kse$date <- dmy(kse$date)

kse <- kse[!duplicated(kse),]
kse <- kse[!duplicated(kse[,1:2]),]



cement_tickers <- c("ACPL
                     BWCL
                     CHCC
                     DBCI
                     DCL
                     DGKC
                     DNCC
                     FCCL
                     FECTC
                     FLYNG
                     GWLC
                     JVDC
                     JVDCPS
                     KOHC
                     LPCL
                     LUCK
                     MLCF
                     PAKCEM
                     PIOC
                     PIOCR
                     POWER
                     SMCPL
                     THCCL
                     ZELP"
)

cement_tickers <-  trimws(unlist(strsplit(cement_tickers, "\n", fixed = FALSE)))

kse2 <- subset(kse, kse[,1] %in% cement_tickers, drop = TRUE)


kse2 <- kse2[c(2,1,6,7)]
#plot all cement industry
ggplot(kse2, aes(date, close)) + geom_line(aes(colour = symbol, group = symbol)) + facet_grid(symbol ~ .,
                                                                                              scales = "free")

# plot by volume quantile
quan <- quantile(kse2$volume, probs = c(.33,.67,1))


kse3 <- na.omit(kse2)
kse3 <- spread(kse3,symbol, close)

cement_volume <- spread(kse2, symbol, volume)
cement_volume <- cement_volume[,c(-2)]


cement_mean <-  sapply(cement_volume[c(-1)], function(x) mean(x, na.rm = TRUE))

cement_mean <- as.data.frame(cement_mean)
cement_mean$symbol <- rownames(cement_mean)
rownames(cement_mean) <- NULL
colnames(cement_mean)[1] <- "mean"

cement industry scripts that are in the lower quartile

score_low <- cement_mean[cement_mean$mean < quan[1],]

low_tickers <- unique(score_low$symbol)

kse_low_volume <- kse2[kse2$symbol %in% low_tickers,]

ggplot(kse_low_volume, aes(date, close)) + geom_line(aes(colour = symbol, group = symbol)) +
  facet_grid(symbol~.,scales = "free")

cement industry scripts that are in the middle quartile

score_middle <- cement_mean[cement_mean$mean > quan[1] &
                              cement_mean <= quan[2],]

middle_tickers <- unique(score_middle$symbol)

kse_middle_volume <- kse2[kse2$symbol %in% middle_tickers,]

ggplot(kse_middle_volume, aes(date, close)) + geom_line(aes(colour = symbol, group = symbol)) +
  facet_grid(symbol~.,scales = "free")

cement industry scripts that are in the higher quartile

score_high <- cement_mean[cement_mean$mean >= quan[2] &
                            cement_mean <= quan[3],]

high_tickers <- unique(score_high$symbol)

kse_high_volume <- kse2[kse2$symbol %in% high_tickers,]

ggplot(kse_high_volume, aes(date, close)) + geom_line(aes(colour = symbol, group = symbol)) +
  facet_grid(symbol~.,scales = "free")

Correlation

Data Manipulation for Correlation

library(PerformanceAnalytics)
compute_pairwise_correlations <- function(prices) {
  # Name: compute_pairwise_correlations
  # Purpose: Calculates pairwise correlations of returns
  # and plots the pairwise relationships
  # Inputs: prices = data.frame of prices
  # Output: correlation_matrix = A correlation matrix
  # Convert prices to returns
  returns <- apply(prices[,c(-1)], 2, function(x) diff(log(x)))
  
  # Plot all the pairwise relationships
  chart.Correlation(returns, main = "Pairwise return scatter plot")
  # Compute the pairwise correlations
  correlation_matrix <- cor(returns, use = "pairwise.complete.obs")
  return(correlation_matrix)
}

Low volume correlation scripts in all kse

`{r} # kse_low_volume <- kse_low_volume[,c(-4)] # kse_low_volume <- spread(kse_low_volume, symbol, close) # kse_low_correlation <- compute_pairwise_correlations(kse_low_volume) #`

Middle volume correlation scripts in all KSE

kse_middle_volume <- kse_middle_volume[,c(-4)]
kse_middle_volume <- spread(kse_middle_volume, symbol, close)
kse_middle_correlation <- compute_pairwise_correlations(kse_middle_volume)

High volume scripts in all KSE

kse_high_volume <- kse_high_volume[,c(-4)]
kse_high_volume <- spread(kse_high_volume, symbol, close)
kse_high_correlation <- compute_pairwise_correlations(kse_high_volume)

Highest Correlation in all KSe

Data Manipulation

kse_cor <- kse[,c(2,1,6)]
kse_cor <- spread(kse_cor, symbol, close)
returns <- apply(kse_cor[,c(-1)], 2, function(x) diff(log(x)))
returns <- as.data.frame(returns)
unwanted <- colnames(returns[,grepl("KSE", names(returns))])
returns2 <- returns[, !grepl("^KSE", colnames(returns))]
kse_return <- returns2[,sapply(returns2, function(x) sum(is.na(x))/length(x) < .7)]
correlation_matrix <- cor(kse_return, use = "p")

library(caret)

## Loading required package: lattice

high_cor <- findCorrelation(correlation_matrix, 0.60 )
high_data <- kse_return[,high_cor]
correlation_matrix2 <- cor(high_data, use = "c")

Graph for for all high correlation

all_high_correlation <- compute_pairwise_correlations(correlation_matrix2)

Presentation 1 (Algo Trading)

Azam Yahya

May 25, 2016

Data Maniputation for Cement sector

cement industry scripts that are in the lower quartile

cement industry scripts that are in the middle quartile

cement industry scripts that are in the higher quartile

Correlation

Data Manipulation for Correlation

Low volume correlation scripts in all kse

`{r} # kse_low_volume <- kse_low_volume[,c(-4)] # kse_low_volume <- spread(kse_low_volume, symbol, close) # kse_low_correlation <- compute_pairwise_correlations(kse_low_volume) #`

Middle volume correlation scripts in all KSE

High volume scripts in all KSE

Highest Correlation in all KSe

Data Manipulation

Graph for for all high correlation