setwd("C:/Users/gerha/Documents")
Using the provided formulae for:
Momentum Resolution Function
Detector Efficiency Function
We reconstruct the plots given, as follows:
#import data and rename variables
dat <- read.csv("phydat.csv")
names(dat) <- c("PT.min","PT.max","mom.res","det.ef")
#define functions to firstly calculate the momentum resolution of the detector, and subsequently use this estimate to calculate a distributed estimator for the transverse momentum spectrum; test the functions using a true PT of 3 GeV (as in the example given), to make sure the functions work as intended
mom.res.func <- function(PT){
resolution <- sqrt((0.08)^2+(0.03*PT)^2)
return(resolution)
}
resolution3 <- mom.res.func(3)
dist.est.PT <- function(resolution,PT){
distributed.estimator <- c(PT+(PT*resolution),PT-(PT*resolution))
return(distributed.estimator)
}
#dist.est.PT(resolution3,3)
#define a function to calculate the efficiency of the detector (defined as the probability that the detector will produce a signal strong enough to be recognized as a particle, at the given true PT)
efficiency <- function(PT){
eff <- 0.95-(1/(4+(PT)))
return(eff)
}
PT.range <- seq(1,10,0.001)
par(mfrow=c(1,3))
dat1 <- dat[1:4,]
plot(x=(dat1$PT.max+dat1$PT.min)/2,y=dat1$mom.res,ylim=c(0,1.2),xlim=c(1,2.5),cex=0.3,main="",xlab="PT(GeV)",ylab="dN/dP_T (GeV^-1)")
library(Hmisc)
minor.tick(ny=10, tick.ratio=0.5)
segments(x0=dat1$PT.min,x1=dat1$PT.max,y0=dat1$mom.res,y1=dat1$mom.res,col="red")
segments(x0=(dat1$PT.max+dat1$PT.min)/2,x1=(dat1$PT.max+dat1$PT.min)/2,y0=dat1$mom.res-dat1$det.ef,y1=dat1$mom.res+dat1$det.ef,col="red")
#plot again to show differences between smaller values
dat2 <- dat[4:9,]
plot(x=(dat2$PT.max+dat2$PT.min)/2,y=dat2$mom.res,ylim=c(0,0.1),xlim=c(2,6),cex=0.3,main="",xlab="PT(GeV)",ylab="dN/dP_T (GeV^-1)")
library(Hmisc)
minor.tick(ny=10, tick.ratio=0.5)
segments(x0=dat2$PT.min,x1=dat2$PT.max,y0=dat2$mom.res,y1=dat2$mom.res,col="red")
segments(x0=(dat2$PT.max+dat2$PT.min)/2,x1=(dat2$PT.max+dat2$PT.min)/2,y0=dat2$mom.res-dat2$det.ef,y1=dat2$mom.res+dat2$det.ef,col="red")
#and again, for even smaller values
dat3 <- dat[9:12,]
plot(x=(dat3$PT.max+dat3$PT.min)/2,y=dat3$mom.res,ylim=c(0,0.003),xlim=c(5,10),cex=0.3,main="",xlab="PT(GeV)",ylab="dN/dP_T (GeV^-1)")
library(Hmisc)
minor.tick(ny=10, tick.ratio=0.5)
segments(x0=dat3$PT.min,x1=dat3$PT.max,y0=dat3$mom.res,y1=dat3$mom.res,col="red")
segments(x0=(dat3$PT.max+dat3$PT.min)/2,x1=(dat3$PT.max+dat3$PT.min)/2,y0=dat3$mom.res-dat3$det.ef,y1=dat3$mom.res+dat3$det.ef,col="red")
PT.res <- mom.res.func(PT.range)
PT.res <- as.data.frame(cbind(PT.res,PT.range))
par(mfrow=c(1,1))
plot(y=PT.res$PT.res,x=PT.res$PT.range,ylim=c(0,0.4),main= "Transverse momentum resolution", ylab = "Resolution Ï(PT)/PT",xlab="PT(GeV)")
det.ef <- efficiency(PT.range)
det.ef <- as.data.frame(cbind(det.ef,PT.range))
plot(y=det.ef$det.ef,x=det.ef$PT.range,ylim=c(0,1),main= "Detector Efficiency", ylab = "Efficiency (epsilon)",xlab="PT(GeV)")
#we are tasked with looking for the detector response matrix using this information (the probabilities of finding a reconstructed momentum in any of the bins of the histogram for a true, or Monte Carlo, momentum)
#this should be done using the correction factors method, to correct the reconstructed spectrum shown above for efficiency and resolution
#required is the detector response matrix, and the corrected spectrum, superimposed on the measured spectrum
true.momentum <- PT.range
# normally distributed estimator for the range of transverse momenta:
momentum.resolution <- mom.res.func(true.momentum)
#distributed estimator for the transverse momentum spectrum
distributed.estimator <- c()
for(i in seq(1,length(true.momentum))){
x <- cbind(dist.est.PT(momentum.resolution[i],true.momentum[i])[1],dist.est.PT(momentum.resolution[i],true.momentum[i])[2])
distributed.estimator <- rbind(distributed.estimator,x)
}
#we make a plot to visualize the distributed estimator vs the true momentum
plot(x=true.momentum,y=distributed.estimator[1:9001],col="white",cex=0.5, main="Normally distributed estimator of transverse momentum\n (blue & red)\nvs. True momentum(black line)",xlab = "True transverse momentum (GeV)",ylab="Distributed estimator for momentum spectrum",sub="*NOTE: Bin widths are indicated with dashed lines*")
segments(x0=true.momentum,y0=distributed.estimator[1:9001],x1=true.momentum,y1=distributed.estimator[9002:18002],col="red")
points(x=true.momentum,y=distributed.estimator[9002:18002],col="blue",cex=0.5)
points(x=true.momentum,y=distributed.estimator[1:9001],col="blue",cex=0.5)
points(x=true.momentum,y=true.momentum,col="black",cex=0.1)
bin1 <- dat$PT.min
bin2 <- dat$PT.max-0.01
abline(v=bin1,lty="dashed")
abline(v=bin2,lty="dashed")
#we create a normally distributed sample (n=20) with mean = true PT and standard deviation=the detector's momentum resolution, for the range of true momenta from 0 to 10
nd <- c()
for(i in seq(1:length(true.momentum))){
normal.dist <- rnorm(20,mean=true.momentum[i],sd=momentum.resolution[i])
nd <- rbind(nd,normal.dist)
}
nd <- cbind(true.momentum,nd)
#prob <- efficiency(true.momentum)
eff <- nd
require(tidyverse)
eff <- as.tibble(eff)
#we now have a sample of normally distributed estimators for a range of true transverse momenta
#we filter data down to entries belonging to each bin:
bins <- c()
for(i in seq(1:12)){
bin <- seq(from=bin1[i],to=bin2[i],by=0.01)
bins <- rbind(bins,bin)
}
names(eff) <- c("PT",seq(1:20))
per.bin <- function(bin){
this.bin <- eff %>%
filter(true.momentum %in% bin)
return(this.bin)
}
bin.1 <- per.bin(bins[1,])
bin.1.1 <- unlist(bin.1[,seq(2:20)])
#plot the normally distributed estimator for the true momentum for the first bin:
hist(bin.1.1,main="Histogram of normally distributed estimators\nfor Bin 1 (Range of true PT: [1.00,1.30])",xlab="GeV",sub="The range of true transverse momenta is indicated in red")
bin1.2 <- bin.1[,c(1,2)]
segments(x0=1,x1=1.3,y0=20,y1=20,col="red",lwd=5)
#We are trying to find the probability that a reconstructed momentum will appear in any of the bins of a histogram of the true momentum values
true.vals <- seq(1:10)
prob.func <- function(i){
sd. <- mom.res.func(i)
norm.dist.est <- rnorm(1000000,mean=i,sd=sd.)
eff <- efficiency(i)
rand.num <- runif(1000000)
prob.est <- 0
for(j in rand.num){
if(j<=eff){
prob.est <- prob.est+1
}
}
prob.est <- prob.est/1000000
return(prob.est)
}
# for(i in true.vals){
# print(paste0("The proportion of 1000000 tracks with true PT of ", as.character(i), "\n that reconstruction should identify as a track,\n making the stated assumptions about resolution and efficiency\n, is: ", as.character(prob.func(i))))
# }
#in order to calculate probabilities for falling into specific bins, we first define the bins:
bin1.range <- c(1,1.3)
bin2.range <- c(1.3,1.6)
bin3.range <- c(1.6,2.1)
bin4.range <- c(2.1,2.6)
bin5.range <- c(2.6,3.1)
bin6.range <- c(3.1,3.7)
bin7.range <- c(3.7,4.4)
bin8.range <- c(4.4,5.2)
bin9.range <- c(5.2,6.0)
bin10.range <- c(6.0,7.2)
bin11.range <- c(7.2,8.4)
bin12.range <- c(8.4,10)
binners <- rbind(bin1.range,bin2.range,bin3.range,bin4.range,bin5.range,bin6.range,bin7.range,bin8.range,bin9.range,bin10.range,bin11.range,bin12.range)
binners <- as.data.frame(binners)
names(binners) <- c("min", "max")
bin.prob <- function(x,bin=binners){
sd. <- mom.res.func(x)
norm.dist.est <- rnorm(1000000,mean=x,sd=sd.)
eff <- efficiency(x)
rand.num <- runif(1000000)
prob.in.bin <- c()
for(i in seq(1:12)){
count <- 0
minimum <- bin[i,1]
maximum <- bin[i,2]
for(j in seq(1:length(rand.num))){
if(rand.num[j]<=eff){
if(norm.dist.est[j]>=minimum){
if(norm.dist.est[j]<maximum){
count <- count+1
}
}
}
}
add.me <- c(x,i,minimum,maximum,count/1000000)
prob.in.bin <- rbind(prob.in.bin,add.me)
}
return(prob.in.bin)
}
par(mfrow=c(1,1))
for(true in seq(1,10,0.5)){
true.string <- paste0("True PT: ",as.character(true)," GeV")
aa <- bin.prob(true)
plot(x=((aa[,4]+aa[,3])/2),y=aa[,5],type="S",main=true.string,xlim = c(-0.5,11),ylim=c(0,1),col="purple",lwd=2,ylab="Proportion",xlab="",xaxt="n")
barplot(height=c((1-(sum(aa[,5]))),(1-(1-(sum(aa[,5]))))),beside=T,col=c("orange","purple"),add=TRUE,xlim=c(-0.5,0.5),space=0,width=0.5)
abline(v=c(1,binners$max),lty="dashed",lwd=0.8,col="blue")
abline(v=true,col="red",lwd=2)
text(true,x=true,y=0.2,cex=2,col="red")
text(c("1.0-1.3 GeV","1.3-1.6","1.6-2.1","2.1-2.6","2.6-3.1","3.1-3.7","3.7-4.4","4.4-5.2","5.2-6.0","6.0-7.2","7.2-8.4","8.4-10.0 GeV"),x=((binners$max+binners$min)/2),y=0.02+(0.1*binners$min),col="blue",cex=0.6)
}
We figure out the proportion of values found in each of the bins of the reconstructed spectrum
We draw a sample of n=1’000’000 from an exponential distribution, weighted by the probability of being in each bin, according to the given reconstructed spectrum
We work out the probability of each sampled observation to appear in each of the twelve bins, and sum over these to get a proportion of expected values that will end up in each bin
#putting it all together:
#we create an empty response matrix
response.matrix <- matrix(nrow=12,ncol=12,data=rep(0,144))
#the response matrix is given by R_ij=P(observed in bin i|true value in bin j)
dimnames(response.matrix) <- list(c(seq(1:12)),c(seq(1:12)))
#we figure out the proportion of values in the reconstructed bins
proportion.factor <- dat[,3]/sum(dat[,3])
#we multiply this by 10'000, so that we generate entries in each bin:
proportion.factor2 <- as.integer(proportion.factor*10000)
#sum(proportion.factor2)
silly.sample <- sample(x=c(1.3,1.6,2.1,2.6,3.1,3.7,4.4,5.2,6.0,7.2,8.4,10),size=10000,replace = T,prob = proportion.factor)
plot(y=proportion.factor,x=binners$max,type="b",col="red",main="Probabilities for any observation being in a (true) bin range",xlab="PT (GeV",ylab="Proportion in bin")
#h. <- hist(silly.sample*100,plot=F)
# probs <- rep(0,12)
#
# for(i in seq(1:length(silly.sample))){
#
# x <- silly.sample[i]
# prob <- bin.prob(x)
# probs <- probs+prob[,5]
# }
bin.prob2 <- function(x,bin=binners){
sd. <- mom.res.func(x)
norm.dist.est <- rnorm(10,mean=x,sd=sd.)
eff <- efficiency(x)
rand.num <- runif(10)
prob.in.bin <- c()
for(i in seq(1:12)){
count <- 0
minimum <- bin[i,1]
maximum <- bin[i,2]
for(j in seq(1:length(rand.num))){
if(rand.num[j]<=eff){
if(norm.dist.est[j]>=minimum){
if(norm.dist.est[j]<maximum){
count <- count+1
}
}
}
}
add.me <- c(x,i,minimum,maximum,count/length(x))
prob.in.bin <- rbind(prob.in.bin,add.me)
}
return(prob.in.bin)
}
aaa <- rep(0,12)
bbb <- c()
for(i in silly.sample){
pp <- bin.prob2(i)
pp <- pp[,5]
aaa <- aaa+pp
bbb <- cbind(bbb,pp)
}
plot(x=((binners$max+binners$min)/2),y=aaa/sum(aaa),type="s",col="blue",xlim=c(0,11),ylim=c(0,0.6),xaxt='n',main="Corrected spectrum (BLUE),\n superimposed on measured spectrum(RED)",xlab="Max values of bins (GeV)",ylab = "Proportion")
axis(side=1, at=(binners$max+binners$min)/2, labels=as.character(binners$max))
segments(x0=((binners$max+binners$min)/2),y0=0,y1=proportion.factor,col="red",lwd=5)
points(x=((binners$max+binners$min)/2),y=aaa/sum(aaa),pch=10,col="blue")
resp <- as.matrix(bbb)
colnames(resp) <- as.character(silly.sample)
rownames(resp) <- c("1.0-1.3 GeV","1.3-1.6","1.6-2.1","2.1-2.6","2.6-3.1","3.1-3.7","3.7-4.4","4.4-5.2","5.2-6.0","6.0-7.2","7.2-8.4","8.4-10.0 GeV")
resp <- t(resp)
columm <- colSums(resp)
collz <- as.numeric(columm)
#collz <- scale(collz,center=F)
collz <- diag(collz)
rownames(collz) <- c("1.0-1.3 GeV","1.3-1.6","1.6-2.1","2.1-2.6","2.6-3.1","3.1-3.7","3.7-4.4","4.4-5.2","5.2-6.0","6.0-7.2","7.2-8.4","8.4-10.0 GeV")
colnames(collz) <- c("1.0-1.3 GeV","1.3-1.6","1.6-2.1","2.1-2.6","2.6-3.1","3.1-3.7","3.7-4.4","4.4-5.2","5.2-6.0","6.0-7.2","7.2-8.4","8.4-10.0 GeV")
#require(knitr)
#kable(collz, caption="Normalised Response Matrix",row.names = T)
Response Matrix
original.spectrum <- scale(dat[,3],center=F)
#if we assume there were 10,000 tracks, they would be as follows:
original.spectrum <- as.integer(original.spectrum*10000)
#we recreate the dataset by repeating the value at bin centre for the specified amount of instances we expect:
observations <- (dat[,1]+dat[,2])/2
original.actual.data <- rep(observations,original.spectrum)
aaa <- rep(0,12)
bbb <- c()
for(i in original.actual.data){
pp <- bin.prob2(i)
pp <- pp[,5]
aaa <- aaa+pp
bbb <- cbind(bbb,pp)
}
reconstructed.spectrum <- scale(aaa/10000,center=F)
#because both spectra were scaled, we multiply by the scale factor for the original spectrum
original.spectrum <- (original.spectrum*0.3799983)/10000 #we multiplied by 10000 to generate entries
reconstructed.spectrum <- reconstructed.spectrum*0.3799983
spectra <- cbind(original.spectrum,reconstructed.spectrum,dat[,2])
spectra <- as.data.frame(spectra)
names(spectra) <- c("y1","y2","x")
ys <- spectra[,1:2]
require(ggplot2)
plot.data <- c(ys$y1,ys$y2)
plot.data <- as.vector(as.numeric(plot.data))
labels <- c(rep("original",12),rep("new",12))
plot.data <- cbind(plot.data,labels)
x <- c((dat[,1]+dat[,2])/2,(dat[,1]+dat[,2])/2)
plot.data <- cbind(x,plot.data)
plot.data <- as.tibble(plot.data)
names(plot.data) <- c("x","y","label")
ggplot(data = plot.data,aes(x,y,fill=label))+geom_col(position = "dodge")+ggtitle("Original spectrum (PURPLE) vs. Corrected Spectrum (ORANGE)")+scale_fill_manual(values=c("orange","purple"))
save.image("Q1.RData")
response.matrix <- as.matrix(rep(0,144))
dim(response.matrix) <-c(12,12)
bin1 <- seq(bin1.range[1],bin1.range[2],length.out = 10)
bin2 <- seq(bin2.range[1],bin2.range[2],length.out = 10)
bin3 <- seq(bin3.range[1],bin3.range[2],length.out = 10)
bin4 <- seq(bin4.range[1],bin4.range[2],length.out = 10)
bin5 <- seq(bin5.range[1],bin5.range[2],length.out = 10)
bin6 <- seq(bin6.range[1],bin6.range[2],length.out = 10)
bin7 <- seq(bin7.range[1],bin7.range[2],length.out = 10)
bin8 <- seq(bin8.range[1],bin8.range[2],length.out = 10)
bin9 <- seq(bin9.range[1],bin9.range[2],length.out = 10)
bin10 <- seq(bin10.range[1],bin10.range[2],length.out = 10)
bin11 <- seq(bin11.range[1],bin11.range[2],length.out = 10)
bin12 <- seq(bin12.range[1],bin12.range[2],length.out = 10)
aaa <- rep(0,12)
bbb <- c()
for(i in bin1){
pp <- bin.prob2(i)
pp <- pp[,5]
aaa <- aaa+pp
bbb <- cbind(bbb,pp)
}
diag(response.matrix) <- diag(response.matrix)+rowSums(bbb)
aaa <- rep(0,12)
bbb <- c()
for(i in bin2){
pp <- bin.prob2(i)
pp <- pp[,5]
aaa <- aaa+pp
bbb <- cbind(bbb,pp)
}
diag(response.matrix) <- diag(response.matrix)+rowSums(bbb)
aaa <- rep(0,12)
bbb <- c()
for(i in bin3){
pp <- bin.prob2(i)
pp <- pp[,5]
aaa <- aaa+pp
bbb <- cbind(bbb,pp)
}
diag(response.matrix) <- diag(response.matrix)+rowSums(bbb)
aaa <- rep(0,12)
bbb <- c()
for(i in bin4){
pp <- bin.prob2(i)
pp <- pp[,5]
aaa <- aaa+pp
bbb <- cbind(bbb,pp)
}
diag(response.matrix) <- diag(response.matrix)+rowSums(bbb)
aaa <- rep(0,12)
bbb <- c()
for(i in bin5){
pp <- bin.prob2(i)
pp <- pp[,5]
aaa <- aaa+pp
bbb <- cbind(bbb,pp)
}
diag(response.matrix) <- diag(response.matrix)+rowSums(bbb)
aaa <- rep(0,12)
bbb <- c()
for(i in bin6){
pp <- bin.prob2(i)
pp <- pp[,5]
aaa <- aaa+pp
bbb <- cbind(bbb,pp)
}
diag(response.matrix) <- diag(response.matrix)+rowSums(bbb)
aaa <- rep(0,12)
bbb <- c()
for(i in bin7){
pp <- bin.prob2(i)
pp <- pp[,5]
aaa <- aaa+pp
bbb <- cbind(bbb,pp)
}
diag(response.matrix) <- diag(response.matrix)+rowSums(bbb)
aaa <- rep(0,12)
bbb <- c()
for(i in bin8){
pp <- bin.prob2(i)
pp <- pp[,5]
aaa <- aaa+pp
bbb <- cbind(bbb,pp)
}
diag(response.matrix) <- diag(response.matrix)+rowSums(bbb)
aaa <- rep(0,12)
bbb <- c()
for(i in bin9){
pp <- bin.prob2(i)
pp <- pp[,5]
aaa <- aaa+pp
bbb <- cbind(bbb,pp)
}
diag(response.matrix) <- diag(response.matrix)+rowSums(bbb)
aaa <- rep(0,12)
bbb <- c()
for(i in bin10){
pp <- bin.prob2(i)
pp <- pp[,5]
aaa <- aaa+pp
bbb <- cbind(bbb,pp)
}
diag(response.matrix) <- diag(response.matrix)+rowSums(bbb)
aaa <- rep(0,12)
bbb <- c()
for(i in bin11){
pp <- bin.prob2(i)
pp <- pp[,5]
aaa <- aaa+pp
bbb <- cbind(bbb,pp)
}
diag(response.matrix) <- diag(response.matrix)+rowSums(bbb)
aaa <- rep(0,12)
bbb <- c()
for(i in bin12){
pp <- bin.prob2(i)
pp <- pp[,5]
aaa <- aaa+pp
bbb <- cbind(bbb,pp)
}
diag(response.matrix) <- diag(response.matrix)+rowSums(bbb)
diag(response.matrix) <- scale(diag(response.matrix),center=F)
require(knitr)
diag(response.matrix) <- diag(response.matrix)/sum(diag(response.matrix))
kable((response.matrix))
| 0.0642487 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 0.0000000 | 0.0756477 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 0.0000000 | 0.0000000 | 0.0818653 | 0.0000000 | 0.0000000 | 0.0000000 | 0.000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 0.0000000 | 0.0000000 | 0.0000000 | 0.0777202 | 0.0000000 | 0.0000000 | 0.000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0911917 | 0.0000000 | 0.000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0891192 | 0.000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.080829 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.000000 | 0.0829016 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.000000 | 0.0000000 | 0.0777202 | 0.0000000 | 0.0000000 | 0.0000000 |
| 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.000000 | 0.0000000 | 0.0000000 | 0.1025907 | 0.0000000 | 0.0000000 |
| 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0860104 | 0.0000000 |
| 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0901554 |