rm(list=ls())
load("c:/Users/gerhard/documents/msc-thesis-data/all_data.rdata")

With dEdX, without P or PT

rm(meta)
names(singal)

singal <- singal[,-c(44,45)]

singal[,1:44] <- scale(singal[,1:44]) 


singal <- as.matrix(singal)

ys <- as.matrix(ys)

test_ind <- sample(1:nrow(singal),size=round(0.15*nrow(singal)),replace = F)

singal_test <- singal[test_ind,]

singal <- singal[-test_ind,]

ys_test <- ys[test_ind,]

ys <- ys[-test_ind,]

require(keras)

ys <- ys[,1]

ys_test <- ys_test[,1]
model <- keras_model_sequential() %>%
  layer_dense(256,"tanh") %>%
  layer_dense(128,"tanh") %>%
  layer_dense(1,"sigmoid")

model %>% compile(
  optimizer = 'adam',
  loss = "binary_crossentropy"
)

history <- model %>% fit(
  x = singal,
  y = ys,
  epochs = 50,
  batch_size = 32,
  verbose=2,
  validation_split=0.15,
  metrics=c('accuracy'),
  shuffle=T
)

png(filename = "C:/Users/gerhard/documents/Msc-thesis/NEW/ML/m3.png")
plot(history)
dev.off()

pred <- model %>%
  predict(singal_test)

pred_act <- cbind(pred,ys_test)
pred_act <- data.frame(pred_act)

names(pred_act) <- c("pred","act")

pi <- which(pred_act$act==1)
el <- which(pred_act$act!=1)

while(length(pi)%%6!=0){
  #print(length(pi))
  pi <- pi[-length(pi)]
  
}

while(length(el)%%6!=0){
  #print(length(el))
  el <- el[-length(el)]
  
}

pred_act <- pred_act[c(el,pi),]

six_tracklet_pred <- c()

for(i in seq(1,nrow(pred_act),6)){
  j=i+5
  
  this.dat <- prod(pred_act$pred[i:j])/sum(prod(pred_act$pred[i:j]),prod(1-pred_act$pred[i:j]))
  
  six_tracklet_pred <- c(six_tracklet_pred,this.dat)
}

six_tracklet_pred <- data.frame(six_tracklet_pred)

six_tracklet_pred <- na.omit(six_tracklet_pred)

six_tracklet_real <- c()

for(i in seq(1,nrow(pred_act),6)){
  this.dat <- pred_act$act[i]
  six_tracklet_real <- c(six_tracklet_real,this.dat)
  
}

six_tracklet_real <- data.frame(six_tracklet_real)

which(is.na(six_tracklet_real))

pred_act <- data.frame(cbind(six_tracklet_pred,six_tracklet_real))

elec_pi_eff_func <- function(model_1.preds,model_1.labels){
  
    # model_1.preds <- read.csv(model_1.preds,header=F, sep="")
    # 
    # model_1.labels <- read.csv(model_1.labels,header=F, sep="")
    
    model_1 <- data.frame(cbind(model_1.preds,model_1.labels))
    
    model_1.electrons <- which(model_1[,2]==1)
    
    electrons <- model_1[model_1.electrons,]
    
    pions <- model_1[-as.numeric(model_1.electrons),]
    
    electrons <- data.frame(electrons)
    
    names(electrons) <- c("prediction","label")
    
    pions <- data.frame(pions)
    
    names(pions) <- c("prediction","label")
    
    electron_efficiency <- function(electrons.,par){
  
    electrons <- electrons.
    
    electrons$electron_pred <- ifelse(electrons$prediction>=par[1],1,0)
    
    correct <- ifelse(electrons$electron_pred==electrons$label,1,0)
    
    error_metric <- sum(correct)/nrow(electrons)
    
    error_metric <- (error_metric-0.9)^2
    
    return(error_metric)
  
}

  res <- optim(par=c(0),fn=electron_efficiency,lower = 0,upper = 1,electrons.=electrons,method="Brent")
  
  require(ggplot2)
  
  g <- ggplot(pred_act,aes(pred_act[,1],colour=factor(pred_act[,2])))+geom_histogram(bins = 1000)+facet_wrap(factor(pred_act[,2]))
  print(g)
  
  hist(pred_act[,1],breaks=1000)
  abline(v=res$par,col="red")
  
  electrons$predicted_label <- ifelse(electrons$prediction>=res$par,1,0)
  
  print(paste0("Electron Efficiency: ",sum(electrons$predicted_label)/nrow(electrons)))
  
  pions$predicted_label <- ifelse(pions$prediction>=res$par,1,0)
  
  pions$misclassified_as_electron <- ifelse(pions$predicted_label==1,1,0)
  
  print(paste0("Pion Efficiency: ",(sum(pions$misclassified_as_electron)/nrow(pions))^6))
  
  pred_act$final_pred <- ifelse(pred_act[,1]>=res$par,1,0)
  
  require(caret)
  
  print(caret::confusionMatrix(data=factor(pred_act$final_pred),reference = factor(pred_act[,2])))
  print("--------------------------------------------------------------------------------------------------")
  
}

elec_pi_eff_func(pred_act[,1],pred_act[,2])

Without dEdX

rm(list=ls())
load("c:/Users/gerhard/documents/msc-thesis-data/all_data.rdata")
rm(meta)
names(singal)

require(ggplot2)

ggplot(singal,aes(x=`meta$P`,col=factor(ys$X1)))+geom_histogram(bins = 100)+facet_wrap(as.factor(ys$X1))

singal <- singal[,-c(42,44,45)]

singal[,1:41] <- scale(singal[,1:41]) 


singal <- as.matrix(singal)

ys <- as.matrix(ys)

test_ind <- sample(1:nrow(singal),size=round(0.15*nrow(singal)),replace = F)

singal_test <- singal[test_ind,]

singal <- singal[-test_ind,]

ys_test <- ys[test_ind,]

ys <- ys[-test_ind,]

require(keras)

ys <- ys[,1]

ys_test <- ys_test[,1]

model <- keras_model_sequential() %>%
  layer_dense(256,"tanh") %>%
  layer_dense(128,"tanh") %>%
  layer_dense(1,"sigmoid")

model %>% compile(
  optimizer = 'adam',
  loss = "binary_crossentropy"
)

history <- model %>% fit(
  x = singal,
  y = ys,
  epochs = 50,
  batch_size = 32,
  verbose=2,
  validation_split=0.15,
  metrics="acc",
  shuffle=T
)

png(filename = "C:/Users/gerhard/documents/Msc-thesis/NEW/ML/m7.png")
plot(history)
dev.off()

pred <- model %>%
  predict(singal_test)

pred_act <- cbind(pred,ys_test)
pred_act <- data.frame(pred_act)

names(pred_act) <- c("pred","act")

pi <- which(pred_act$act==1)
el <- which(pred_act$act!=1)

while(length(pi)%%6!=0){
  #print(length(pi))
  pi <- pi[-length(pi)]
  
}

while(length(el)%%6!=0){
  #print(length(el))
  el <- el[-length(el)]
  
}

pred_act <- pred_act[c(el,pi),]

six_tracklet_pred <- c()

for(i in seq(1,nrow(pred_act),6)){
  j=i+5
  
  this.dat <- prod(pred_act$pred[i:j])/sum(prod(pred_act$pred[i:j]),prod(1-pred_act$pred[i:j]))
  
  six_tracklet_pred <- c(six_tracklet_pred,this.dat)
}

six_tracklet_pred <- data.frame(six_tracklet_pred)

six_tracklet_pred <- na.omit(six_tracklet_pred)

six_tracklet_real <- c()

for(i in seq(1,nrow(pred_act),6)){
  this.dat <- pred_act$act[i]
  six_tracklet_real <- c(six_tracklet_real,this.dat)
  
}

six_tracklet_real <- data.frame(six_tracklet_real)

which(is.na(six_tracklet_real))

pred_act <- data.frame(cbind(six_tracklet_pred,six_tracklet_real))

elec_pi_eff_func <- function(model_1.preds,model_1.labels){
  
    # model_1.preds <- read.csv(model_1.preds,header=F, sep="")
    # 
    # model_1.labels <- read.csv(model_1.labels,header=F, sep="")
    
    model_1 <- data.frame(cbind(model_1.preds,model_1.labels))
    
    model_1.electrons <- which(model_1[,2]==1)
    
    electrons <- model_1[model_1.electrons,]
    
    pions <- model_1[-as.numeric(model_1.electrons),]
    
    electrons <- data.frame(electrons)
    
    names(electrons) <- c("prediction","label")
    
    pions <- data.frame(pions)
    
    names(pions) <- c("prediction","label")
    
    electron_efficiency <- function(electrons.,par){
  
    electrons <- electrons.
    
    electrons$electron_pred <- ifelse(electrons$prediction>=par[1],1,0)
    
    correct <- ifelse(electrons$electron_pred==electrons$label,1,0)
    
    error_metric <- sum(correct)/nrow(electrons)
    
    error_metric <- (error_metric-0.9)^2
    
    return(error_metric)
  
}

  res <- optim(par=c(0),fn=electron_efficiency,lower = 0,upper = 1,electrons.=electrons,method="Brent")
  
  require(ggplot2)
  
  g <- ggplot(pred_act,aes(pred_act[,1],colour=factor(pred_act[,2])))+geom_histogram(bins = 1000)+facet_wrap(factor(pred_act[,2]))
  print(g)
  
  hist(pred_act[,1],breaks=1000)
  abline(v=res$par,col="red")
  
  electrons$predicted_label <- ifelse(electrons$prediction>=res$par,1,0)
  
  print(paste0("Electron Efficiency: ",sum(electrons$predicted_label)/nrow(electrons)))
  
  pions$predicted_label <- ifelse(pions$prediction>=res$par,1,0)
  
  pions$misclassified_as_electron <- ifelse(pions$predicted_label==1,1,0)
  
  print(paste0("Pion Efficiency: ",(sum(pions$misclassified_as_electron)/nrow(pions))^6))
  
  pred_act$final_pred <- ifelse(pred_act[,1]>=res$par,1,0)
  
  require(caret)
  
  print(caret::confusionMatrix(data=factor(pred_act$final_pred),reference = factor(pred_act[,2])))
  print("--------------------------------------------------------------------------------------------------")
  
}

elec_pi_eff_func(pred_act[,1],pred_act[,2])

Without Eta, Theta or Phi, i.e. just rowsums and columnsums

singal <- singal[,1:41]
singal_test <- singal_test[,1:41]
model <- keras_model_sequential() %>%
  layer_dense(256,"tanh") %>%
  layer_dense(128,"tanh") %>%
  layer_dense(1,"sigmoid")

model %>% compile(
  optimizer = 'adam',
  loss = "binary_crossentropy"
)

history <- model %>% fit(
  x = singal,
  y = ys,
  epochs = 50,
  batch_size = 32,
  verbose=2,
  validation_split=0.15,
  metrics="acc",
  shuffle=T
)

png(filename = "C:/Users/gerhard/documents/Msc-thesis/NEW/ML/m11.png")
plot(history)
dev.off()

pred <- model %>%
  predict(singal_test)

pred_act <- cbind(pred,ys_test)
pred_act <- data.frame(pred_act)

names(pred_act) <- c("pred","act")

pi <- which(pred_act$act==1)
el <- which(pred_act$act!=1)

while(length(pi)%%6!=0){
  #print(length(pi))
  pi <- pi[-length(pi)]
  
}

while(length(el)%%6!=0){
  #print(length(el))
  el <- el[-length(el)]
  
}

pred_act <- pred_act[c(el,pi),]

six_tracklet_pred <- c()

for(i in seq(1,nrow(pred_act),6)){
  j=i+5
  
  this.dat <- prod(pred_act$pred[i:j])/sum(prod(pred_act$pred[i:j]),prod(1-pred_act$pred[i:j]))
  
  six_tracklet_pred <- c(six_tracklet_pred,this.dat)
}

six_tracklet_pred <- data.frame(six_tracklet_pred)

six_tracklet_pred <- na.omit(six_tracklet_pred)

six_tracklet_real <- c()

for(i in seq(1,nrow(pred_act),6)){
  this.dat <- pred_act$act[i]
  six_tracklet_real <- c(six_tracklet_real,this.dat)
  
}

six_tracklet_real <- data.frame(six_tracklet_real)

which(is.na(six_tracklet_real))

pred_act <- data.frame(cbind(six_tracklet_pred,six_tracklet_real))

elec_pi_eff_func <- function(model_1.preds,model_1.labels){
  
    # model_1.preds <- read.csv(model_1.preds,header=F, sep="")
    # 
    # model_1.labels <- read.csv(model_1.labels,header=F, sep="")
    
    model_1 <- data.frame(cbind(model_1.preds,model_1.labels))
    
    model_1.electrons <- which(model_1[,2]==1)
    
    electrons <- model_1[model_1.electrons,]
    
    pions <- model_1[-as.numeric(model_1.electrons),]
    
    electrons <- data.frame(electrons)
    
    names(electrons) <- c("prediction","label")
    
    pions <- data.frame(pions)
    
    names(pions) <- c("prediction","label")
    
    electron_efficiency <- function(electrons.,par){
  
    electrons <- electrons.
    
    electrons$electron_pred <- ifelse(electrons$prediction>=par[1],1,0)
    
    correct <- ifelse(electrons$electron_pred==electrons$label,1,0)
    
    error_metric <- sum(correct)/nrow(electrons)
    
    error_metric <- (error_metric-0.9)^2
    
    return(error_metric)
  
}

  res <- optim(par=c(0),fn=electron_efficiency,lower = 0,upper = 1,electrons.=electrons,method="Brent")
  
  require(ggplot2)
  
  g <- ggplot(pred_act,aes(pred_act[,1],colour=factor(pred_act[,2])))+geom_histogram(bins = 1000)+facet_wrap(factor(pred_act[,2]))
  print(g)
  
  hist(pred_act[,1],breaks=1000)
  abline(v=res$par,col="red")
  
  electrons$predicted_label <- ifelse(electrons$prediction>=res$par,1,0)
  
  print(paste0("Electron Efficiency: ",sum(electrons$predicted_label)/nrow(electrons)))
  
  pions$predicted_label <- ifelse(pions$prediction>=res$par,1,0)
  
  pions$misclassified_as_electron <- ifelse(pions$predicted_label==1,1,0)
  
  print(paste0("Pion Efficiency: ",(sum(pions$misclassified_as_electron)/nrow(pions))^6))
  
  pred_act$final_pred <- ifelse(pred_act[,1]>=res$par,1,0)
  
  require(caret)
  
  print(caret::confusionMatrix(data=factor(pred_act$final_pred),reference = factor(pred_act[,2])))
  print("--------------------------------------------------------------------------------------------------")
  
}

elec_pi_eff_func(pred_act[,1],pred_act[,2])