rm(list=ls())
sim_files <- list.files(path="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/", pattern="*.json", full.names=T, recursive=T)
require(jsonlite)
j <- list()
count=0
for(i in sim_files){
# count=count+1
# print(print(count/length(sim_files)))
j <- c(j,fromJSON(i))
# print(length(j))
}
##
layer0 <- sapply(j, `[[`,"layer 0")
nSigmaPion0 <- sapply(j, `[[`,"nSigmaPion")
P0 <- sapply(j, `[[`,"P")
Eta0 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer0,typeof)=="list"),which(sapply(layer0, is.null))))
length(n)
layer0 <- layer0[-n]
nSigmaPion0 <- nSigmaPion0[-n]
P0 <- P0[-n]
Eta0 <- Eta0[-n]
for(i in 1:length(layer0)){
layer0[[i]][which(layer0[[i]]==-7169)] <- 0
}
##
layer1 <- sapply(j, `[[`,"layer 1")
nSigmaPion1 <- sapply(j, `[[`,"nSigmaPion")
P1 <- sapply(j, `[[`,"P")
Eta1 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer1,typeof)=="list"),which(sapply(layer1, is.null))))
length(n)
layer1 <- layer1[-n]
nSigmaPion1 <- nSigmaPion1[-n]
P1 <- P1[-n]
Eta1 <- Eta1[-n]
for(i in 1:length(layer1)){
layer1[[i]][which(layer1[[i]]==-7169)] <- 0
}
##
layer2 <- sapply(j, `[[`,"layer 2")
nSigmaPion2 <- sapply(j, `[[`,"nSigmaPion")
P2 <- sapply(j, `[[`,"P")
Eta2 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer2,typeof)=="list"),which(sapply(layer2, is.null))))
length(n)
layer2 <- layer2[-n]
nSigmaPion2 <- nSigmaPion2[-n]
P2 <- P2[-n]
Eta2 <- Eta2[-n]
for(i in 1:length(layer2)){
layer2[[i]][which(layer2[[i]]==-7169)] <- 0
}
##
layer3 <- sapply(j, `[[`,"layer 3")
nSigmaPion3 <- sapply(j, `[[`,"nSigmaPion")
P3 <- sapply(j, `[[`,"P")
Eta3 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer3,typeof)=="list"),which(sapply(layer3, is.null))))
length(n)
layer3 <- layer3[-n]
nSigmaPion3 <- nSigmaPion3[-n]
P3 <- P3[-n]
Eta3 <- Eta3[-n]
for(i in 1:length(layer3)){
layer3[[i]][which(layer3[[i]]==-7169)] <- 0
}
##
layer4 <- sapply(j, `[[`,"layer 4")
nSigmaPion4 <- sapply(j, `[[`,"nSigmaPion")
P4 <- sapply(j, `[[`,"P")
Eta4 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer4,typeof)=="list"),which(sapply(layer4, is.null))))
length(n)
layer4 <- layer4[-n]
nSigmaPion4 <- nSigmaPion4[-n]
P4 <- P4[-n]
Eta4 <- Eta4[-n]
for(i in 1:length(layer4)){
layer4[[i]][which(layer4[[i]]==-7169)] <- 0
}
##
layer5 <- sapply(j, `[[`,"layer 5")
nSigmaPion5 <- sapply(j, `[[`,"nSigmaPion")
P5 <- sapply(j, `[[`,"P")
Eta5 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer5,typeof)=="list"),which(sapply(layer5, is.null))))
length(n)
layer5 <- layer5[-n]
nSigmaPion5 <- nSigmaPion5[-n]
P5 <- P5[-n]
Eta5 <- Eta5[-n]
for(i in 1:length(layer5)){
layer5[[i]][which(layer5[[i]]==-7169)] <- 0
}
##
rm(j)
sim_x <- c(layer0,layer1,layer2,layer3,layer4,layer5)
rm(layer0,layer1,layer2,layer3,layer4,layer5)
for(i in 1:length(sim_x)){
dim(sim_x[[i]]) <- c(1,17,24)
# print(i/length(sim_x))
}
require(abind)
sim_x <- abind(sim_x,along=1)
nsigmaPion <- c(nSigmaPion0,nSigmaPion1,nSigmaPion2,nSigmaPion3,nSigmaPion4,nSigmaPion5)
nsigmaPion <- as.numeric(unlist(nsigmaPion))
length(nsigmaPion)==dim(sim_x)[1]
rm(nSigmaPion0,nSigmaPion1,nSigmaPion2,nSigmaPion3,nSigmaPion4,nSigmaPion5)
P <- c(P0,P1,P2,P3,P4,P5)
P <- as.numeric(unlist(P))
length(P)==dim(sim_x)[1]
rm(P0,P1,P2,P3,P4,P5)
sim_P <- P
rm(P)
Eta <- c(Eta0,Eta1,Eta2,Eta3,Eta4,Eta5)
Eta <- as.numeric(unlist(Eta))
length(Eta)==dim(sim_x)[1]
rm(Eta0,Eta1,Eta2,Eta3,Eta4,Eta5)
sim_Eta <- Eta
rm(Eta)
sim_nsig <- nsigmaPion
rm(nsigmaPion)
save(sim_x,file="c:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_x_full.rdata")
save(sim_nsig,file="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_nsig.rdata")
save(sim_P,file="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_P.rdata")
save(sim_Eta,file="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_Eta.rdata")
rm(list=ls())
real_files <- list.files(path="C:/Users/gerhard/Documents/msc-thesis-data/processed/000265343/", pattern="*.json", full.names=T, recursive=T)
require(jsonlite)
j <- list()
count <- 0
for(i in real_files){
# count=count+1
# print(count/length(real_files))
j <- c(j,fromJSON(i))
# print(length(j))
}
##
layer0 <- sapply(j, `[[`,"layer 0")
nSigmaPion0 <- sapply(j, `[[`,"nSigmaPion")
pdgCode0 <- sapply(j, `[[`,"pdgCode")
P0 <- sapply(j, `[[`,"P")
Eta0 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer0,typeof)=="list"),which(sapply(layer0, is.null))))
length(n)
layer0 <- layer0[-n]
nSigmaPion0 <- nSigmaPion0[-n]
pdgCode0 <- pdgCode0[-n]
P0 <- P0[-n]
Eta0 <- Eta0[-n]
# for(i in 1:length(layer0)){
# layer0[[i]][which(layer0[[i]]==-7169)] <- 0
# }
##
layer1 <- sapply(j, `[[`,"layer 1")
nSigmaPion1 <- sapply(j, `[[`,"nSigmaPion")
pdgCode1 <- sapply(j, `[[`,"pdgCode")
P1 <- sapply(j, `[[`,"P")
Eta1 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer1,typeof)=="list"),which(sapply(layer1, is.null))))
length(n)
layer1 <- layer1[-n]
nSigmaPion1 <- nSigmaPion1[-n]
pdgCode1 <- pdgCode1[-n]
P1 <- P1[-n]
Eta1 <- Eta1[-n]
# for(i in 1:length(layer1)){
# layer1[[i]][which(layer1[[i]]==-7169)] <- 0
# }
##
layer2 <- sapply(j, `[[`,"layer 2")
nSigmaPion2 <- sapply(j, `[[`,"nSigmaPion")
pdgCode2 <- sapply(j, `[[`,"pdgCode")
P2 <- sapply(j, `[[`,"P")
Eta2 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer2,typeof)=="list"),which(sapply(layer2, is.null))))
length(n)
layer2 <- layer2[-n]
pdgCode2 <- pdgCode2[-n]
nSigmaPion2 <- nSigmaPion2[-n]
P2 <- P2[-n]
Eta2 <- Eta2[-n]
# for(i in 1:length(layer2)){
# layer2[[i]][which(layer2[[i]]==-7169)] <- 0
# }
##
layer3 <- sapply(j, `[[`,"layer 3")
nSigmaPion3 <- sapply(j, `[[`,"nSigmaPion")
pdgCode3 <- sapply(j, `[[`,"pdgCode")
P3 <- sapply(j, `[[`,"P")
Eta3 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer3,typeof)=="list"),which(sapply(layer3, is.null))))
length(n)
layer3 <- layer3[-n]
pdgCode3 <- pdgCode3[-n]
nSigmaPion3 <- nSigmaPion3[-n]
P3 <- P3[-n]
Eta3 <- Eta3[-n]
# for(i in 1:length(layer3)){
# layer3[[i]][which(layer3[[i]]==-7169)] <- 0
# }
##
layer4 <- sapply(j, `[[`,"layer 4")
nSigmaPion4 <- sapply(j, `[[`,"nSigmaPion")
pdgCode4 <- sapply(j, `[[`,"pdgCode")
P4 <- sapply(j, `[[`,"P")
Eta4 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer4,typeof)=="list"),which(sapply(layer4, is.null))))
length(n)
layer4 <- layer4[-n]
pdgCode4 <- pdgCode4[-n]
nSigmaPion4 <- nSigmaPion4[-n]
P4 <- P4[-n]
Eta4 <- Eta4[-n]
# for(i in 1:length(layer4)){
# layer4[[i]][which(layer4[[i]]==-7169)] <- 0
# }
##
layer5 <- sapply(j, `[[`,"layer 5")
nSigmaPion5 <- sapply(j, `[[`,"nSigmaPion")
pdgCode5 <- sapply(j, `[[`,"pdgCode")
P5 <- sapply(j, `[[`,"P")
Eta5 <- sapply(j, `[[`,"Eta")
n <- unique(c(which(sapply(layer5,typeof)=="list"),which(sapply(layer5, is.null))))
length(n)
layer5 <- layer5[-n]
pdgCode5 <- pdgCode5[-n]
nSigmaPion5 <- nSigmaPion5[-n]
P5 <- P5[-n]
Eta5 <- Eta5[-n]
# for(i in 1:length(layer5)){
# layer5[[i]][which(layer5[[i]]==-7169)] <- 0
# }
##
rm(j)
real_x <- c(layer0,layer1,layer2,layer3,layer4,layer5)
rm(layer0,layer1,layer2,layer3,layer4,layer5)
for(i in 1:length(real_x)){
dim(real_x[[i]]) <- c(1,17,24)
# print(i/length(real_x))
}
require(abind)
real_x <- abind(real_x,along=1)
nsigmaPion <- c(nSigmaPion0,nSigmaPion1,nSigmaPion2,nSigmaPion3,nSigmaPion4,nSigmaPion5)
nsigmaPion <- as.numeric(unlist(nsigmaPion))
pdgCode <- c(pdgCode0,pdgCode1,pdgCode2,pdgCode3,pdgCode4,pdgCode5)
pdgCode <- as.numeric(unlist(pdgCode))
rm(nSigmaPion0,nSigmaPion1,nSigmaPion2,nSigmaPion3,nSigmaPion4,nSigmaPion5)
rm(pdgCode0,pdgCode1,pdgCode2,pdgCode3,pdgCode4,pdgCode5)
P <- c(P0,P1,P2,P3,P4,P5)
P <- as.numeric(unlist(P))
# length(P)==dim(real_x)[1]
rm(P0,P1,P2,P3,P4,P5)
real_P <- P
rm(P)
Eta <- c(Eta0,Eta1,Eta2,Eta3,Eta4,Eta5)
Eta <- as.numeric(unlist(Eta))
# length(Eta)==dim(sim_x)[1]
rm(Eta0,Eta1,Eta2,Eta3,Eta4,Eta5)
real_Eta <- Eta
rm(Eta)
pions <- which(abs(pdgCode)==211)
nsigmaPion <- nsigmaPion[pions]
real_x <- real_x[pions,,]
real_Eta <- real_Eta[pions]
real_P <- real_P[pions]
length(nsigmaPion)==dim(real_x)[1]
real_nsig <- nsigmaPion
rm(nsigmaPion)
save(real_P,file="c:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_P.rdata")
save(real_x,file="c:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_x_full.rdata")
save(real_nsig,file="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_nsig.rdata")
save(real_P,file="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_P.rdata")
save(real_Eta,file="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_Eta.rdata")
save(pdgCode,file="C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_pdg.rdata")
rm(list=ls())
load("c:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_x_full.rdata")
load("C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_nsig.rdata")
load("C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_P.rdata")
load("C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/sim_Eta.rdata")
load("c:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_P.rdata")
load("c:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_x_full.rdata")
load("C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_nsig.rdata")
load("C:/Users/gerhard/Documents/msc-thesis-data/hijing-sim/real_Eta.rdata")
dim(sim_x)
dim(real_x)
require(ggplot2)
Eta <- data.frame(rbind(cbind(real_Eta,"real"),cbind(sim_Eta,"sim")))
names(Eta) <- c("Eta","real_or_fake")
Eta$Eta <- as.numeric(as.character(Eta$Eta))
Eta$real_or_fake <- as.factor(Eta$real_or_fake)
ggplot(Eta,aes(Eta,fill=real_or_fake,colour=real_or_fake,alpha=0.2
))+geom_freqpoly(bins=1000)

require(ggplot2)
P <- data.frame(rbind(cbind(real_P,"real"),cbind(sim_P,"sim")))
names(P) <- c("P","real_or_fake")
P$P <- as.numeric(as.character(P$P))
P$real_or_fake <- as.factor(P$real_or_fake)
ggplot(P,aes(P,fill=real_or_fake,colour=real_or_fake,alpha=0.2))+geom_freqpoly(bins=1000)
P <- data.frame(P)
P1 <- P[P$P<=20,]
ggplot(P1,aes(P,fill=real_or_fake,colour=real_or_fake,alpha=0.2))+geom_freqpoly(bins=1000)


require(ggplot2)
nsig <- data.frame(rbind(cbind(real_nsig,"real"),cbind(sim_nsig,"sim")))
names(nsig) <- c("nsig","real_or_fake")
nsig$nsig <- as.numeric(as.character(nsig$nsig))
nsig$real_or_fake <- as.factor(nsig$real_or_fake)
ggplot(nsig,aes(nsig,fill=real_or_fake,colour=real_or_fake,alpha=0.2
))+geom_freqpoly(bins=1000)
nsig2 <- nsig[abs(nsig$nsig)<=3,,]
ggplot(nsig2,aes(nsig,fill=real_or_fake,colour=real_or_fake,alpha=0.2
))+geom_freqpoly(bins=1000)


require(abind)
x <- abind(real_x,sim_x,along=1)
y <- c(rep(1,dim(real_x)[1]),rep(0,dim(sim_x)[1]))
x <- x[abs(Eta$Eta)<=.9,,]
nsig <- nsig[(abs(Eta$Eta))<=.9,]
P <- P[(abs(Eta$Eta))<=.9,]
y <- y[(abs(Eta$Eta))<=.9]
x <- x[P$P<=20,,]
nsig <- nsig[P$P<=20,]
y <- y[P$P<=20]
x <- x[abs(nsig$nsig)<=3,,]
y <- y[abs(nsig$nsig)<=3]
rm(real_Eta,real_nsig,real_P,sim_Eta,sim_nsig,sim_P,real_x,sim_x,Eta,nsig,P,P1)
rm(real_x,sim_x)
require(keras)
dim(x) <- c(dim(x),1)
x <- (x-max(x))/max(x)
train_ind <- sample(1:length(y),size=round(0.75*length(y)))
x_train <- x[train_ind,,,]
y_train <- y[train_ind]
x_test <- x[-train_ind,,,]
y_test <- y[-train_ind]
dim(x_train) <- c(dim(x_train),1)
dim(x_test) <- c(dim(x_test),1)
model <- keras_model_sequential() %>%
layer_conv_2d(filters=1,kernel_size=c(17,24))%>%#,activation="relu") %>%
# layer_max_pooling_2d() %>%
# layer_dropout(rate=0.2) %>%
# layer_conv_2d(filters=32,kernel_size=c(3,3),activation="relu") %>%
# layer_max_pooling_2d() %>%
# layer_dropout(rate=0.2) %>%
layer_flatten() %>%
layer_dense(1,"softmax")
model %>%
compile(
loss="binary_crossentropy",
optimizer_sgd(0.000001),
metrics="acc"
)
history <- model %>%
fit(x_train,
y_train,
batch_size=32,
verbose=2,
epochs=20,
validation_data=list(x_test,y_test),
shuffle=TRUE)
png("C:/Users/gerhard/documents/MSc-thesis/NEW/ML/geant_v_real1.png")
plot(history)
dev.off()

rm(model)
model <- keras_model_sequential() %>%
layer_flatten() %>%
layer_dense(512,"relu") %>%
layer_dense(256,"relu") %>%
layer_dense(128,"relu") %>%
layer_dense(1,"sigmoid")
model %>%
compile(
loss="binary_crossentropy",
optimizer_sgd(0.000001),
metrics="acc"
)
history <- model %>%
fit(x_train,
y_train,
batch_size=32,
verbose=2,
epochs=20,
validation_data=list(x_test,y_test),
shuffle=TRUE)
png("C:/Users/gerhard/documents/MSc-thesis/NEW/ML/geant_v_real2.png")
plot(history)
dev.off()

rm(model)
model <- keras_model_sequential() %>%
layer_flatten() %>%
layer_dense(32,"relu") %>%
layer_dense(1,"sigmoid")
model %>%
compile(
loss="binary_crossentropy",
optimizer_sgd(0.01),
metrics="acc"
)
history <- model %>%
fit(x_train,
y_train,
batch_size=8,
verbose=2,
epochs=20,
validation_data=list(x_test,y_test),
shuffle=TRUE)
png("C:/Users/gerhard/documents/MSc-thesis/NEW/ML/geant_v_real3.png")
plot(history)
dev.off()

barplot(table(y_train))
# barplot(table(y_test))

real <- which(y==1)
fake <- which(y!=1)
i <- c(fake,sample(real,length(fake),replace=F))
x <- x[i,,,]
y <- y[i]
dim(x) <- c(dim(x),1)
barplot(table(y))
train_ind <- sample(1:length(y),size=round(0.75*length(y)))
x_train <- x[train_ind,,,]
y_train <- y[train_ind]
x_test <- x[-train_ind,,,]
y_test <- y[-train_ind]
final_test_ind <- sample(1:length(y_train),size=round(0.1*length(y_train)))
x_train <- x[-final_test_ind,,,]
y_train <- y[-final_test_ind]
x_final_test <- x[final_test_ind,,,]
y_final_test <- y[final_test_ind]
dim(x_train) <- c(dim(x_train),1)
dim(x_test) <- c(dim(x_test),1)
dim(x_final_test) <- c(dim(x_final_test),1)
rm(x,y)

model <- keras_model_sequential() %>%
layer_conv_2d(32, kernel_size = c(3,2)) %>%
layer_max_pooling_2d() %>%
layer_flatten() %>%
layer_dense(128,"tanh") %>%
layer_dense(1, "sigmoid")
model %>%
compile(
loss="binary_crossentropy",
optimizer_adam(0.001),
metrics="acc"
)
history <- model %>%
fit(x_train,
y_train,
batch_size=8,
verbose=2,
epochs=20,
validation_data=list(x_test,y_test),
shuffle=TRUE)
png("C:/Users/gerhard/documents/MSc-thesis/NEW/ML/geant_v_real4.png")
plot(history)
dev.off()

model <- keras_model_sequential() %>%
layer_conv_2d(16, kernel_size = c(2,3)) %>%
layer_max_pooling_2d() %>%
layer_conv_2d(32, kernel_size = c(3,3)) %>%
layer_max_pooling_2d() %>%
layer_flatten() %>%
layer_dense(128,"tanh") %>%
layer_dense(1, "sigmoid")
model %>%
compile(
loss="binary_crossentropy",
optimizer_adam(0.001),
metrics="acc"
)
history <- model %>%
fit(x_train,
y_train,
batch_size=8,
verbose=2,
epochs=20,
validation_data=list(x_test,y_test),
shuffle=TRUE)
png("C:/Users/gerhard/documents/MSc-thesis/NEW/ML/geant_v_real5.png")
plot(history)
dev.off()

model <- keras_model_sequential() %>%
layer_conv_2d(16, kernel_size = c(3,3),input_shape=c(17,24,1)) %>%
layer_max_pooling_2d() %>%
layer_conv_2d(32, kernel_size = c(3,3)) %>%
layer_max_pooling_2d() %>%
layer_flatten() %>%
layer_dense(256,"tanh") %>%
layer_dense(128,"tanh") %>%
layer_dense(1, "sigmoid")
model %>%
compile(
loss="binary_crossentropy",
optimizer_adam(0.001),
metrics="acc"
)
history <- model %>%
fit(x_train,
y_train,
batch_size=8,
verbose=2,
epochs=20,
validation_data=list(x_test,y_test),
shuffle=TRUE)
png("C:/Users/gerhard/documents/MSc-thesis/NEW/ML/geant_v_real6.png")
plot(history,"C:/Users/gerhard/documents/MSc-thesis/NEW/ML/model6.h5")
dev.off()

model <- keras_model_sequential() %>%
layer_conv_2d(16, kernel_size = c(2,3),activation = "tanh") %>%
layer_max_pooling_2d() %>%
layer_conv_2d(32, kernel_size = c(3,3),activation = "tanh") %>%
layer_max_pooling_2d() %>%
layer_flatten() %>%
layer_dense(128,"tanh") %>%
layer_dropout(rate=0.1) %>%
layer_dense(128,"tanh") %>%
layer_dense(1, "sigmoid")
model %>%
compile(
loss="binary_crossentropy",
optimizer_adam(0.001),
metrics="acc"
)
history <- model %>%
fit(x_train,
y_train,
batch_size=8,
verbose=2,
epochs=20,
validation_data=list(x_test,y_test),
shuffle=TRUE)
png("C:/Users/gerhard/documents/MSc-thesis/NEW/ML/geant_v_real7.png")
plot(history)
dev.off()

preds <- model %>%
predict(x_final_test)
pred_act <- data.frame(cbind(preds,y_final_test))
pred_act$pred <- ifelse(pred_act$V1>=0.5,1,0)
require(caret)
require(pander)
caret::confusionMatrix(data=factor(pred_act$pred),reference = factor(pred_act$y_final_test))
