Project Description

Recreate dataset and visualize

require(tidyverse)

m.inv <- seq(2.1,4,0.1)
counts <- c(69,41,43,46,43,30,44,36,61,72,70,74,53,37,26,20,14,16,19,11)

dat <- c()

for(mass in m.inv){
  dat <- c(dat,rep(m.inv,counts))
  
}
hist(dat,breaks = c(2,m.inv),right=TRUE,col="blue",border="orange",include.lowest = T,main = "Uncorrected Invariant Mass Spectrum in the J/Psi region",xlab="Invariant Mass (GeV/c^2)\n Signal region indicated with arrows",ylab = "dN/dm")
abline(v=3.097,col="red",lwd=10)
arrows(x0=2.8,y0=500,x1=3.4,y1=500,code=3,col="red",lwd=3)
text("Expected Invariant Mass=3.097 GeV",col="white",cex=1.5,x=3.1,y=100)

Linear model

framed.dat <- as.data.frame(cbind(m.inv,counts))

signal.reg <- framed.dat[8:14,]

noise.reg <- rbind(framed.dat[1:7,],framed.dat[15:20,])

plot(signal.reg,type="p",col="red",cex=2,xlim=c(1,5),ylim=c(0,80),pch=19,xlab="", ylab="",main="Linear model of background region (blue line) \noverplotted with actual data points",sub="Note: Red circles are measurements that fall into \nwhat we've defined as the signal region (2.8-3.4GeV/c^2)")
points(noise.reg,type="p",col="blue",cex=1,pch=16)


bg.mod <- lm(noise.reg$counts~noise.reg$m.inv)
abline(bg.mod,col="blue")

stand.dev <- sigma(bg.mod)
stand.err <- coef(summary(bg.mod))[[2,2]]

Analysis of model accuracy

#R SQUARED IS:
summary(bg.mod)$r.squared
## [1] 0.823606
#ADJUSTED R SQUARED IS:
summary(bg.mod)$adj.r.squared
## [1] 0.8075702
#F-STATISTIC IS:
summary(bg.mod)$fstatistic[[1]]
## [1] 51.3604
require(pander)
pander(bg.mod)
Fitting linear model: noise.reg\(counts ~ noise.reg\)m.inv
  Estimate Std. Error t value Pr(>|t|)
(Intercept) 96.01 9.101 10.55 4.321e-07
noise.reg$m.inv -21.02 2.933 -7.167 1.829e-05

An analysis of the linear model describing the background region (tabulated above), yields the following observations:

hist(bg.mod$residuals,col="lightblue", main = "Histogram of distribution of residuals",xlab="Residuals")

We also find that:

  • Standard errors on the coefficients are low, relative to magnitude of coefficient values
  • Both coefficients are significant at the p << 0.01 level
  • Adjusted R-squared suggests that ≈ 80% of the variation in the background-region data is explained by the linear model
  • F-statistic is relatively large at 11 degrees of freedom (p<<0.01)

Looking only at statistically significant observations (>5σ)

predicted.points <- c()

for(point in m.inv){
  ans <- coefficients(bg.mod)[[1]] + (coefficients(bg.mod)[[2]]*point)
  predicted.points <- c(predicted.points,ans)
}


signal <- c(rep(F,7),rep(T,7),rep(F,6))

framed.dat <- cbind(framed.dat,predicted.points,signal)

backgr.in.sign <- framed.dat %>%
  filter(signal==T)
backgr.in.sign <- backgr.in.sign %>%
  mutate(minus.background=counts-predicted.points)

plot(framed.dat$m.inv,framed.dat$counts,pch=16,col="blue",ylim = c(min(framed.dat$predicted.points)-(5*stand.dev),max(framed.dat$predicted.points)+(5*stand.dev)),main = "Predicted Data Points (green),\nvs. Observed Data Points (red in signal region,\n blue in background region)",xlab = "",ylab = "",sub="NOTE: Purple points indicate 5 standard deviations \nabove expected background in signal region")

points(backgr.in.sign$m.inv,backgr.in.sign$counts,pch=16,col="red")
points(framed.dat$m.inv,framed.dat$predicted.points,pch=16,col="green")
points(x=backgr.in.sign$m.inv, y=backgr.in.sign$predicted.points+(5*stand.dev),pch=16,col="purple",cex=0.7)

arrows(x0=backgr.in.sign$m.inv, y0=backgr.in.sign$predicted.points+(5*stand.dev)+stand.err, x1=backgr.in.sign$m.inv, y1=backgr.in.sign$predicted.points+(5*stand.dev)-stand.err, length=0.05, angle=90, code=3,col="purple")

num.j.psi <- sum(framed.dat$counts[10:12])-sum(framed.dat$predicted.points[10:12])
conf <- stand.err*3

num.j.psi-conf
## [1] 114.6668
num.j.psi+conf
## [1] 132.266

Conclusion: