rm(list=ls())

sim_files <- list.files(path="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/", pattern="*.json", full.names=T, recursive=T)

require(jsonlite)

j <- list()

count=0
for(i in sim_files){
  # count=count+1
  # print(print(count/length(sim_files)))
  j <- c(j,fromJSON(i))
  # print(length(j))
}


##

layer0 <- sapply(j, `[[`,"layer 0")

nSigmaPion0 <- sapply(j, `[[`,"nSigmaPion")

P0 <- sapply(j, `[[`,"P")

Eta0 <- sapply(j, `[[`,"Eta")

n <- unique(c(which(sapply(layer0,typeof)=="list"),which(sapply(layer0, is.null))))

length(n)

layer0 <- layer0[-n]
nSigmaPion0 <- nSigmaPion0[-n]
P0 <- P0[-n]
Eta0 <- Eta0[-n]

for(i in 1:length(layer0)){
  layer0[[i]][which(layer0[[i]]==-7169)] <- 0
}

##

layer1 <- sapply(j, `[[`,"layer 1")

nSigmaPion1 <- sapply(j, `[[`,"nSigmaPion")

P1 <- sapply(j, `[[`,"P")

Eta1 <- sapply(j, `[[`,"Eta")


n <- unique(c(which(sapply(layer1,typeof)=="list"),which(sapply(layer1, is.null))))

length(n)

layer1 <- layer1[-n]
nSigmaPion1 <- nSigmaPion1[-n]
P1 <- P1[-n]
Eta1 <- Eta1[-n]

for(i in 1:length(layer1)){
  layer1[[i]][which(layer1[[i]]==-7169)] <- 0
}

##

layer2 <- sapply(j, `[[`,"layer 2")

nSigmaPion2 <- sapply(j, `[[`,"nSigmaPion")

P2 <- sapply(j, `[[`,"P")

Eta2 <- sapply(j, `[[`,"Eta")

n <- unique(c(which(sapply(layer2,typeof)=="list"),which(sapply(layer2, is.null))))

length(n)

layer2 <- layer2[-n]
nSigmaPion2 <- nSigmaPion2[-n]
P2 <- P2[-n]
Eta2 <- Eta2[-n]

for(i in 1:length(layer2)){
  layer2[[i]][which(layer2[[i]]==-7169)] <- 0
}

##

layer3 <- sapply(j, `[[`,"layer 3")

nSigmaPion3 <- sapply(j, `[[`,"nSigmaPion")

P3 <- sapply(j, `[[`,"P")
Eta3 <- sapply(j, `[[`,"Eta")


n <- unique(c(which(sapply(layer3,typeof)=="list"),which(sapply(layer3, is.null))))

length(n)

layer3 <- layer3[-n]
nSigmaPion3 <- nSigmaPion3[-n]
P3 <- P3[-n]
Eta3 <- Eta3[-n]

for(i in 1:length(layer3)){
  layer3[[i]][which(layer3[[i]]==-7169)] <- 0
}

##

layer4 <- sapply(j, `[[`,"layer 4")

nSigmaPion4 <- sapply(j, `[[`,"nSigmaPion")

P4 <- sapply(j, `[[`,"P")

Eta4 <- sapply(j, `[[`,"Eta")

n <- unique(c(which(sapply(layer4,typeof)=="list"),which(sapply(layer4, is.null))))

length(n)

layer4 <- layer4[-n]
nSigmaPion4 <- nSigmaPion4[-n]
P4 <- P4[-n]
Eta4 <- Eta4[-n]

for(i in 1:length(layer4)){
  layer4[[i]][which(layer4[[i]]==-7169)] <- 0
}

##

layer5 <- sapply(j, `[[`,"layer 5")

nSigmaPion5 <- sapply(j, `[[`,"nSigmaPion")

P5 <- sapply(j, `[[`,"P")

Eta5 <- sapply(j, `[[`,"Eta")

n <- unique(c(which(sapply(layer5,typeof)=="list"),which(sapply(layer5, is.null))))

length(n)

layer5 <- layer5[-n]
nSigmaPion5 <- nSigmaPion5[-n]
P5 <- P5[-n]
Eta5 <- Eta5[-n]

for(i in 1:length(layer5)){
  layer5[[i]][which(layer5[[i]]==-7169)] <- 0
}

##

rm(j)

sim_x <- c(layer0,layer1,layer2,layer3,layer4,layer5)

rm(layer0,layer1,layer2,layer3,layer4,layer5)

for(i in 1:length(sim_x)){
  dim(sim_x[[i]]) <- c(1,17,24)
  # print(i/length(sim_x))
  
}

require(abind)

sim_x <- abind(sim_x,along=1)



nsigmaPion <- c(nSigmaPion0,nSigmaPion1,nSigmaPion2,nSigmaPion3,nSigmaPion4,nSigmaPion5)
nsigmaPion <- as.numeric(unlist(nsigmaPion))

length(nsigmaPion)==dim(sim_x)[1]

rm(nSigmaPion0,nSigmaPion1,nSigmaPion2,nSigmaPion3,nSigmaPion4,nSigmaPion5)

P <- c(P0,P1,P2,P3,P4,P5)
P <- as.numeric(unlist(P))

length(P)==dim(sim_x)[1]

rm(P0,P1,P2,P3,P4,P5)

sim_P <- P

rm(P)

Eta <- c(Eta0,Eta1,Eta2,Eta3,Eta4,Eta5)
Eta <- as.numeric(unlist(Eta))

length(Eta)==dim(sim_x)[1]

rm(Eta0,Eta1,Eta2,Eta3,Eta4,Eta5)

sim_Eta <- Eta

rm(Eta)

sim_nsig <- nsigmaPion
rm(nsigmaPion)

save(sim_x,file="c:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_x_full.rdata")
save(sim_nsig,file="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_nsig.rdata")
save(sim_P,file="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_P.rdata")
save(sim_Eta,file="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_Eta.rdata")

rm(list=ls())

real_files <- list.files(path="C:/Users/gerhard/Documents/msc-thesis-data/processed/000265343/", pattern="*.json", full.names=T, recursive=T)

require(jsonlite)

j <- list()
count <- 0
for(i in real_files){
  # count=count+1
  # print(count/length(real_files))
  j <- c(j,fromJSON(i))
  # print(length(j))
}


##

layer0 <- sapply(j, `[[`,"layer 0")

nSigmaPion0 <- sapply(j, `[[`,"nSigmaPion")
pdgCode0 <- sapply(j, `[[`,"pdgCode")

P0 <- sapply(j, `[[`,"P")

Eta0 <- sapply(j, `[[`,"Eta")



n <- unique(c(which(sapply(layer0,typeof)=="list"),which(sapply(layer0, is.null))))

length(n)

layer0 <- layer0[-n]
nSigmaPion0 <- nSigmaPion0[-n]
pdgCode0 <- pdgCode0[-n]
P0 <- P0[-n]
Eta0 <- Eta0[-n]

# for(i in 1:length(layer0)){
#   layer0[[i]][which(layer0[[i]]==-7169)] <- 0
# }

##

layer1 <- sapply(j, `[[`,"layer 1")

nSigmaPion1 <- sapply(j, `[[`,"nSigmaPion")
pdgCode1 <- sapply(j, `[[`,"pdgCode")
P1 <- sapply(j, `[[`,"P")
Eta1 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer1,typeof)=="list"),which(sapply(layer1, is.null))))

length(n)

layer1 <- layer1[-n]
nSigmaPion1 <- nSigmaPion1[-n]
pdgCode1 <- pdgCode1[-n]
P1 <- P1[-n]
Eta1 <- Eta1[-n]

# for(i in 1:length(layer1)){
#   layer1[[i]][which(layer1[[i]]==-7169)] <- 0
# }

##

layer2 <- sapply(j, `[[`,"layer 2")

nSigmaPion2 <- sapply(j, `[[`,"nSigmaPion")
pdgCode2 <- sapply(j, `[[`,"pdgCode")
P2 <- sapply(j, `[[`,"P")
Eta2 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer2,typeof)=="list"),which(sapply(layer2, is.null))))

length(n)

layer2 <- layer2[-n]
pdgCode2 <- pdgCode2[-n]
nSigmaPion2 <- nSigmaPion2[-n]
P2 <- P2[-n]
Eta2 <- Eta2[-n]

# for(i in 1:length(layer2)){
#   layer2[[i]][which(layer2[[i]]==-7169)] <- 0
# }

##

layer3 <- sapply(j, `[[`,"layer 3")

nSigmaPion3 <- sapply(j, `[[`,"nSigmaPion")
pdgCode3 <- sapply(j, `[[`,"pdgCode")
P3 <- sapply(j, `[[`,"P")
Eta3 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer3,typeof)=="list"),which(sapply(layer3, is.null))))

length(n)

layer3 <- layer3[-n]
pdgCode3 <- pdgCode3[-n]
nSigmaPion3 <- nSigmaPion3[-n]
P3 <- P3[-n]
Eta3 <- Eta3[-n]

# for(i in 1:length(layer3)){
#   layer3[[i]][which(layer3[[i]]==-7169)] <- 0
# }

##

layer4 <- sapply(j, `[[`,"layer 4")

nSigmaPion4 <- sapply(j, `[[`,"nSigmaPion")
pdgCode4 <- sapply(j, `[[`,"pdgCode")
P4 <- sapply(j, `[[`,"P")
Eta4 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer4,typeof)=="list"),which(sapply(layer4, is.null))))

length(n)

layer4 <- layer4[-n]
pdgCode4 <- pdgCode4[-n]
nSigmaPion4 <- nSigmaPion4[-n]
P4 <- P4[-n]
Eta4 <- Eta4[-n]

# for(i in 1:length(layer4)){
#   layer4[[i]][which(layer4[[i]]==-7169)] <- 0
# }

##

layer5 <- sapply(j, `[[`,"layer 5")

nSigmaPion5 <- sapply(j, `[[`,"nSigmaPion")
pdgCode5 <- sapply(j, `[[`,"pdgCode")
P5 <- sapply(j, `[[`,"P")
Eta5 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer5,typeof)=="list"),which(sapply(layer5, is.null))))

length(n)

layer5 <- layer5[-n]
pdgCode5 <- pdgCode5[-n]
nSigmaPion5 <- nSigmaPion5[-n]
P5 <- P5[-n]
Eta5 <- Eta5[-n]

# for(i in 1:length(layer5)){
#   layer5[[i]][which(layer5[[i]]==-7169)] <- 0
# }

##

rm(j)

real_x <- c(layer0,layer1,layer2,layer3,layer4,layer5)

rm(layer0,layer1,layer2,layer3,layer4,layer5)

for(i in 1:length(real_x)){
  dim(real_x[[i]]) <- c(1,17,24)
  # print(i/length(real_x))
  
}

require(abind)

real_x <- abind(real_x,along=1)



nsigmaPion <- c(nSigmaPion0,nSigmaPion1,nSigmaPion2,nSigmaPion3,nSigmaPion4,nSigmaPion5)
nsigmaPion <- as.numeric(unlist(nsigmaPion))

pdgCode <- c(pdgCode0,pdgCode1,pdgCode2,pdgCode3,pdgCode4,pdgCode5)
pdgCode <- as.numeric(unlist(pdgCode))

rm(nSigmaPion0,nSigmaPion1,nSigmaPion2,nSigmaPion3,nSigmaPion4,nSigmaPion5)
rm(pdgCode0,pdgCode1,pdgCode2,pdgCode3,pdgCode4,pdgCode5)

P <- c(P0,P1,P2,P3,P4,P5)
P <- as.numeric(unlist(P))

# length(P)==dim(real_x)[1]

rm(P0,P1,P2,P3,P4,P5)

real_P <- P

rm(P)

Eta <- c(Eta0,Eta1,Eta2,Eta3,Eta4,Eta5)
Eta <- as.numeric(unlist(Eta))

# length(Eta)==dim(sim_x)[1]

rm(Eta0,Eta1,Eta2,Eta3,Eta4,Eta5)

real_Eta <- Eta

rm(Eta)

pions <- which(abs(pdgCode)==211)

nsigmaPion <- nsigmaPion[pions]
real_x <- real_x[pions,,]
real_Eta <- real_Eta[pions]
real_P <- real_P[pions]

length(nsigmaPion)==dim(real_x)[1]

real_nsig <- nsigmaPion

rm(nsigmaPion)

save(real_P,file="c:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_P.rdata")
save(real_x,file="c:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_x_full.rdata")
save(real_nsig,file="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_nsig.rdata")
save(real_P,file="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_P.rdata")
save(real_Eta,file="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_Eta.rdata")
save(pdgCode,file="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_pdg.rdata")
rm(list=ls())

load("c:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_x_full.rdata")
load("C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_nsig.rdata")
load("C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_P.rdata")
load("C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_Eta.rdata")

load("c:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_P.rdata")
load("c:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_x_full.rdata")
load("C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_nsig.rdata")
load("C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_Eta.rdata")
dim(sim_x)
dim(real_x)
require(ggplot2)

Eta <- data.frame(rbind(cbind(real_Eta,"real"),cbind(sim_Eta,"sim")))

names(Eta) <- c("Eta","real_or_fake")

Eta$Eta <- as.numeric(as.character(Eta$Eta))
Eta$real_or_fake <- as.factor(Eta$real_or_fake)

ggplot(Eta,aes(Eta,fill=real_or_fake,colour=real_or_fake,alpha=0.2
                      ))+geom_freqpoly(bins=1000)

require(ggplot2)

P <- data.frame(rbind(cbind(real_P,"real"),cbind(sim_P,"sim")))

names(P) <- c("P","real_or_fake")

P$P <- as.numeric(as.character(P$P))
P$real_or_fake <- as.factor(P$real_or_fake)

ggplot(P,aes(P,fill=real_or_fake,colour=real_or_fake,alpha=0.2))+geom_freqpoly(bins=1000)

P <- data.frame(P)

P1 <- P[P$P<=20,]

ggplot(P1,aes(P,fill=real_or_fake,colour=real_or_fake,alpha=0.2))+geom_freqpoly(bins=1000)

require(ggplot2)

nsig <- data.frame(rbind(cbind(real_nsig,"real"),cbind(sim_nsig,"sim")))

names(nsig) <- c("nsig","real_or_fake")

nsig$nsig <- as.numeric(as.character(nsig$nsig))
nsig$real_or_fake <- as.factor(nsig$real_or_fake)

ggplot(nsig,aes(nsig,fill=real_or_fake,colour=real_or_fake,alpha=0.2
                      ))+geom_freqpoly(bins=1000)

nsig2 <- nsig[abs(nsig$nsig)<=3,,]

ggplot(nsig2,aes(nsig,fill=real_or_fake,colour=real_or_fake,alpha=0.2
                      ))+geom_freqpoly(bins=1000)

require(abind)
x <- abind(real_x,sim_x,along=1)
y <- c(rep(1,dim(real_x)[1]),rep(0,dim(sim_x)[1]))

x <- x[abs(Eta$Eta)<=.9,,]

nsig <- nsig[(abs(Eta$Eta))<=.9,]

P <- P[(abs(Eta$Eta))<=.9,]

y <- y[(abs(Eta$Eta))<=.9]

x <- x[P$P<=20,,]

nsig <- nsig[P$P<=20,]

y <- y[P$P<=20]

x <- x[abs(nsig$nsig)<=3,,]

y <- y[abs(nsig$nsig)<=3]


rm(real_Eta,real_nsig,real_P,sim_Eta,sim_nsig,sim_P,real_x,sim_x,Eta,nsig,P,P1)

rm(real_x,sim_x)

require(keras)

dim(x) <- c(dim(x),1)

x <- (x-max(x))/max(x)

train_ind <- sample(1:length(y),size=round(0.75*length(y)))

x_train <- x[train_ind,,,]
y_train <- y[train_ind]

x_test <- x[-train_ind,,,]
y_test <- y[-train_ind]

dim(x_train) <- c(dim(x_train),1)
dim(x_test) <- c(dim(x_test),1)
model <- keras_model_sequential() %>%
  layer_conv_2d(filters=1,kernel_size=c(17,24))%>%#,activation="relu") %>%
  # layer_max_pooling_2d() %>%
  # layer_dropout(rate=0.2) %>%
  # layer_conv_2d(filters=32,kernel_size=c(3,3),activation="relu") %>%
  # layer_max_pooling_2d() %>%
  # layer_dropout(rate=0.2) %>%
  layer_flatten() %>%
  layer_dense(1,"softmax")

model %>%
  compile(
    loss="binary_crossentropy",
    optimizer_sgd(0.000001),
    metrics="acc"
    
  )
  
history <- model %>%
  fit(x_train,
      y_train,
      batch_size=32,
      verbose=2,
      epochs=20,
      validation_data=list(x_test,y_test),
      shuffle=TRUE)

png("C:/Users/gerhard/documents/MSc-thesis/NEW/ML/geant_v_real1.png")
plot(history)
dev.off()

rm(model)


model <- keras_model_sequential() %>%
  layer_flatten() %>%
  layer_dense(512,"relu") %>%
  layer_dense(256,"relu") %>%
  layer_dense(128,"relu") %>%
  layer_dense(1,"sigmoid")

model %>%
  compile(
    loss="binary_crossentropy",
    optimizer_sgd(0.000001),
    metrics="acc"
    
  )
  
history <- model %>%
  fit(x_train,
      y_train,
      batch_size=32,
      verbose=2,
      epochs=20,
      validation_data=list(x_test,y_test),
      shuffle=TRUE)

png("C:/Users/gerhard/documents/MSc-thesis/NEW/ML/geant_v_real2.png")
plot(history)
dev.off()

rm(model)


model <- keras_model_sequential() %>%
  layer_flatten() %>%
  layer_dense(32,"relu") %>%
  layer_dense(1,"sigmoid")

model %>%
  compile(
    loss="binary_crossentropy",
    optimizer_sgd(0.01),
    metrics="acc"
    
  )
  
history <- model %>%
  fit(x_train,
      y_train,
      batch_size=8,
      verbose=2,
      epochs=20,
      validation_data=list(x_test,y_test),
      shuffle=TRUE)

png("C:/Users/gerhard/documents/MSc-thesis/NEW/ML/geant_v_real3.png")
plot(history)
dev.off()

barplot(table(y_train))
# barplot(table(y_test))

real <- which(y==1)
fake <- which(y!=1)

i <- c(fake,sample(real,length(fake),replace=F))

x <- x[i,,,]
y <- y[i]
dim(x) <- c(dim(x),1)

barplot(table(y))


train_ind <- sample(1:length(y),size=round(0.75*length(y)))

x_train <- x[train_ind,,,]
y_train <- y[train_ind]

x_test <- x[-train_ind,,,]
y_test <- y[-train_ind]

final_test_ind <- sample(1:length(y_train),size=round(0.1*length(y_train)))

x_train <- x[-final_test_ind,,,]
y_train <- y[-final_test_ind]

x_final_test <- x[final_test_ind,,,]
y_final_test <- y[final_test_ind]


dim(x_train) <- c(dim(x_train),1)
dim(x_test) <- c(dim(x_test),1)
dim(x_final_test) <- c(dim(x_final_test),1)

rm(x,y)

model <- keras_model_sequential() %>%
  layer_conv_2d(32, kernel_size = c(3,2)) %>%
  layer_max_pooling_2d() %>%
  layer_flatten() %>%
  layer_dense(128,"tanh") %>%
  layer_dense(1, "sigmoid")

model %>%
  compile(
    loss="binary_crossentropy",
    optimizer_adam(0.001),
    metrics="acc"
    
  )
  
history <- model %>%
  fit(x_train,
      y_train,
      batch_size=8,
      verbose=2,
      epochs=20,
      validation_data=list(x_test,y_test),
      shuffle=TRUE)

png("C:/Users/gerhard/documents/MSc-thesis/NEW/ML/geant_v_real4.png")
plot(history)
dev.off()

model <- keras_model_sequential() %>%
  layer_conv_2d(16, kernel_size = c(2,3)) %>%
  layer_max_pooling_2d() %>%
  layer_conv_2d(32, kernel_size = c(3,3)) %>%
  layer_max_pooling_2d() %>%
  layer_flatten() %>%
  layer_dense(128,"tanh") %>%
  layer_dense(1, "sigmoid")

model %>%
  compile(
    loss="binary_crossentropy",
    optimizer_adam(0.001),
    metrics="acc"
    
  )
  
history <- model %>%
  fit(x_train,
      y_train,
      batch_size=8,
      verbose=2,
      epochs=20,
      validation_data=list(x_test,y_test),
      shuffle=TRUE)

png("C:/Users/gerhard/documents/MSc-thesis/NEW/ML/geant_v_real5.png")
plot(history)
dev.off()

model <- keras_model_sequential() %>%
  layer_conv_2d(16, kernel_size = c(3,3),input_shape=c(17,24,1)) %>%
  layer_max_pooling_2d() %>%
  layer_conv_2d(32, kernel_size = c(3,3)) %>%
  layer_max_pooling_2d() %>%
  layer_flatten() %>%
  layer_dense(256,"tanh") %>%
  layer_dense(128,"tanh") %>%
  layer_dense(1, "sigmoid")

model %>%
  compile(
    loss="binary_crossentropy",
    optimizer_adam(0.001),
    metrics="acc"
    
  )
  
history <- model %>%
  fit(x_train,
      y_train,
      batch_size=8,
      verbose=2,
      epochs=20,
      validation_data=list(x_test,y_test),
      shuffle=TRUE)

png("C:/Users/gerhard/documents/MSc-thesis/NEW/ML/geant_v_real6.png")
plot(history,"C:/Users/gerhard/documents/MSc-thesis/NEW/ML/model6.h5")
dev.off()

model <- keras_model_sequential() %>%
  layer_conv_2d(16, kernel_size = c(2,3),activation = "tanh") %>%
  layer_max_pooling_2d() %>%
  layer_conv_2d(32, kernel_size = c(3,3),activation = "tanh") %>%
  layer_max_pooling_2d() %>%
  layer_flatten() %>%
  layer_dense(128,"tanh") %>%
  layer_dropout(rate=0.1) %>%
  layer_dense(128,"tanh") %>%
  layer_dense(1, "sigmoid")

model %>%
  compile(
    loss="binary_crossentropy",
    optimizer_adam(0.001),
    metrics="acc"
    
  )
  
history <- model %>%
  fit(x_train,
      y_train,
      batch_size=8,
      verbose=2,
      epochs=20,
      validation_data=list(x_test,y_test),
      shuffle=TRUE)

png("C:/Users/gerhard/documents/MSc-thesis/NEW/ML/geant_v_real7.png")
plot(history)
dev.off()

preds <- model %>%
  predict(x_final_test)

pred_act <- data.frame(cbind(preds,y_final_test))

pred_act$pred <- ifelse(pred_act$V1>=0.5,1,0)

require(caret)

require(pander)
  
caret::confusionMatrix(data=factor(pred_act$pred),reference = factor(pred_act$y_final_test))