This is the first task pertaining to algo trading. This task involves trying to find pattern through visualization. The document is presented in the following manner:

Data Maniputation for Cement sector

library(lubridate)
library(dplyr)
library(tidyr)
library(xts)
library(quantmod)
library(PerformanceAnalytics)
library(RJSONIO)
library(TTR)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
setwd("Z:/Ferguson 13 march/other assignment/DS/Stock Market App/Data/OHLC")
kse <- read.csv("ksedata(full).csv")
kse  <- kse[kse$symbol != "symbol",]

kse <- kse[,1:7]
kse[,3:7] <- lapply(kse[,3:7], function(x) as.numeric(as.character(x)))

kse$date <- dmy(kse$date)

kse <- kse[!duplicated(kse),]
kse <- kse[!duplicated(kse[,1:2]),]



cement_tickers <- c("ACPL
                     BWCL
                     CHCC
                     DBCI
                     DCL
                     DGKC
                     DNCC
                     FCCL
                     FECTC
                     FLYNG
                     GWLC
                     JVDC
                     JVDCPS
                     KOHC
                     LPCL
                     LUCK
                     MLCF
                     PAKCEM
                     PIOC
                     PIOCR
                     POWER
                     SMCPL
                     THCCL
                     ZELP"
)

cement_tickers <-  trimws(unlist(strsplit(cement_tickers, "\n", fixed = FALSE)))

kse2 <- subset(kse, kse[,1] %in% cement_tickers, drop = TRUE)


kse2 <- kse2[c(2,1,6,7)]
#plot all cement industry
ggplot(kse2, aes(date, close)) + geom_line(aes(colour = symbol, group = symbol)) + facet_grid(symbol ~ .,
                                                                                              scales = "free")

# plot by volume quantile
quan <- quantile(kse2$volume, probs = c(.33,.67,1))


kse3 <- na.omit(kse2)
kse3 <- spread(kse3,symbol, close)

cement_volume <- spread(kse2, symbol, volume)
cement_volume <- cement_volume[,c(-2)]


cement_mean <-  sapply(cement_volume[c(-1)], function(x) mean(x, na.rm = TRUE))

cement_mean <- as.data.frame(cement_mean)
cement_mean$symbol <- rownames(cement_mean)
rownames(cement_mean) <- NULL
colnames(cement_mean)[1] <- "mean"

cement industry scripts that are in the lower quartile

score_low <- cement_mean[cement_mean$mean < quan[1],]

low_tickers <- unique(score_low$symbol)

kse_low_volume <- kse2[kse2$symbol %in% low_tickers,]

ggplot(kse_low_volume, aes(date, close)) + geom_line(aes(colour = symbol, group = symbol)) +
  facet_grid(symbol~.,scales = "free")

cement industry scripts that are in the middle quartile

score_middle <- cement_mean[cement_mean$mean > quan[1] &
                              cement_mean <= quan[2],]

middle_tickers <- unique(score_middle$symbol)

kse_middle_volume <- kse2[kse2$symbol %in% middle_tickers,]

ggplot(kse_middle_volume, aes(date, close)) + geom_line(aes(colour = symbol, group = symbol)) +
  facet_grid(symbol~.,scales = "free")

cement industry scripts that are in the higher quartile

score_high <- cement_mean[cement_mean$mean >= quan[2] &
                            cement_mean <= quan[3],]

high_tickers <- unique(score_high$symbol)

kse_high_volume <- kse2[kse2$symbol %in% high_tickers,]

ggplot(kse_high_volume, aes(date, close)) + geom_line(aes(colour = symbol, group = symbol)) +
  facet_grid(symbol~.,scales = "free")

Correlation

Data Manipulation for Correlation

library(PerformanceAnalytics)
compute_pairwise_correlations <- function(prices) {
  # Name: compute_pairwise_correlations
  # Purpose: Calculates pairwise correlations of returns
  # and plots the pairwise relationships
  # Inputs: prices = data.frame of prices
  # Output: correlation_matrix = A correlation matrix
  # Convert prices to returns
  returns <- apply(prices[,c(-1)], 2, function(x) diff(log(x)))
  
  # Plot all the pairwise relationships
  chart.Correlation(returns, main = "Pairwise return scatter plot")
  # Compute the pairwise correlations
  correlation_matrix <- cor(returns, use = "pairwise.complete.obs")
  return(correlation_matrix)
} 

Low volume correlation scripts in all kse

{r} # kse_low_volume <- kse_low_volume[,c(-4)] # kse_low_volume <- spread(kse_low_volume, symbol, close) # kse_low_correlation <- compute_pairwise_correlations(kse_low_volume) #

Middle volume correlation scripts in all KSE

kse_middle_volume <- kse_middle_volume[,c(-4)]
kse_middle_volume <- spread(kse_middle_volume, symbol, close)
kse_middle_correlation <- compute_pairwise_correlations(kse_middle_volume)

High volume scripts in all KSE

kse_high_volume <- kse_high_volume[,c(-4)]
kse_high_volume <- spread(kse_high_volume, symbol, close)
kse_high_correlation <- compute_pairwise_correlations(kse_high_volume)

Highest Correlation in all KSe

Data Manipulation

kse_cor <- kse[,c(2,1,6)]
kse_cor <- spread(kse_cor, symbol, close)
returns <- apply(kse_cor[,c(-1)], 2, function(x) diff(log(x)))
returns <- as.data.frame(returns)
unwanted <- colnames(returns[,grepl("KSE", names(returns))])
returns2 <- returns[, !grepl("^KSE", colnames(returns))]
kse_return <- returns2[,sapply(returns2, function(x) sum(is.na(x))/length(x) < .7)]
correlation_matrix <- cor(kse_return, use = "p")

library(caret)
## Loading required package: lattice
high_cor <- findCorrelation(correlation_matrix, 0.60 )
high_data <- kse_return[,high_cor]
correlation_matrix2 <- cor(high_data, use = "c")

Graph for for all high correlation

all_high_correlation <- compute_pairwise_correlations(correlation_matrix2)