require(tidyverse)
m.inv <- seq(2.1,4,0.1)
counts <- c(69,41,43,46,43,30,44,36,61,72,70,74,53,37,26,20,14,16,19,11)
dat <- c()
for(mass in m.inv){
dat <- c(dat,rep(m.inv,counts))
}
hist(dat,breaks = c(2,m.inv),right=TRUE,col="blue",border="orange",include.lowest = T,main = "Uncorrected Invariant Mass Spectrum in the J/Psi region",xlab="Invariant Mass (GeV/c^2)\n Signal region indicated with arrows",ylab = "dN/dm")
abline(v=3.097,col="red",lwd=10)
arrows(x0=2.8,y0=500,x1=3.4,y1=500,code=3,col="red",lwd=3)
text("Expected Invariant Mass=3.097 GeV",col="white",cex=1.5,x=3.1,y=100)
framed.dat <- as.data.frame(cbind(m.inv,counts))
signal.reg <- framed.dat[8:14,]
noise.reg <- rbind(framed.dat[1:7,],framed.dat[15:20,])
plot(signal.reg,type="p",col="red",cex=2,xlim=c(1,5),ylim=c(0,80),pch=19,xlab="", ylab="",main="Linear model of background region (blue line) \noverplotted with actual data points",sub="Note: Red circles are measurements that fall into \nwhat we've defined as the signal region (2.8-3.4GeV/c^2)")
points(noise.reg,type="p",col="blue",cex=1,pch=16)
bg.mod <- lm(noise.reg$counts~noise.reg$m.inv)
abline(bg.mod,col="blue")
stand.dev <- sigma(bg.mod)
stand.err <- coef(summary(bg.mod))[[2,2]]
#R SQUARED IS:
summary(bg.mod)$r.squared
## [1] 0.823606
#ADJUSTED R SQUARED IS:
summary(bg.mod)$adj.r.squared
## [1] 0.8075702
#F-STATISTIC IS:
summary(bg.mod)$fstatistic[[1]]
## [1] 51.3604
require(pander)
pander(bg.mod)
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 96.01 | 9.101 | 10.55 | 4.321e-07 |
| noise.reg$m.inv | -21.02 | 2.933 | -7.167 | 1.829e-05 |
An analysis of the linear model describing the background region (tabulated above), yields the following observations:
hist(bg.mod$residuals,col="lightblue", main = "Histogram of distribution of residuals",xlab="Residuals")
predicted.points <- c()
for(point in m.inv){
ans <- coefficients(bg.mod)[[1]] + (coefficients(bg.mod)[[2]]*point)
predicted.points <- c(predicted.points,ans)
}
signal <- c(rep(F,7),rep(T,7),rep(F,6))
framed.dat <- cbind(framed.dat,predicted.points,signal)
backgr.in.sign <- framed.dat %>%
filter(signal==T)
backgr.in.sign <- backgr.in.sign %>%
mutate(minus.background=counts-predicted.points)
plot(framed.dat$m.inv,framed.dat$counts,pch=16,col="blue",ylim = c(min(framed.dat$predicted.points)-(5*stand.dev),max(framed.dat$predicted.points)+(5*stand.dev)),main = "Predicted Data Points (green),\nvs. Observed Data Points (red in signal region,\n blue in background region)",xlab = "",ylab = "",sub="NOTE: Purple points indicate 5 standard deviations \nabove expected background in signal region")
points(backgr.in.sign$m.inv,backgr.in.sign$counts,pch=16,col="red")
points(framed.dat$m.inv,framed.dat$predicted.points,pch=16,col="green")
points(x=backgr.in.sign$m.inv, y=backgr.in.sign$predicted.points+(5*stand.dev),pch=16,col="purple",cex=0.7)
arrows(x0=backgr.in.sign$m.inv, y0=backgr.in.sign$predicted.points+(5*stand.dev)+stand.err, x1=backgr.in.sign$m.inv, y1=backgr.in.sign$predicted.points+(5*stand.dev)-stand.err, length=0.05, angle=90, code=3,col="purple")
In the above plot, green points indicate expected counts due to background, red points indicate number of observations per bin in the signal region, and blue points indicate number of observations per bin outside the signal region, the purple points indicate where data will be 5 standard deviations above background in the signal region (error bars are the standard error on the coefficient measurement)
We multiple the standard error by three, effectively summing over the uncertainty for each of the three observations used in our final prediction, to get the confidence interval.
num.j.psi <- sum(framed.dat$counts[10:12])-sum(framed.dat$predicted.points[10:12])
conf <- stand.err*3
num.j.psi-conf
## [1] 114.6668
num.j.psi+conf
## [1] 132.266