### Creating frequency table
data <- data.frame(c(0:4), c(2962,382,47,25,4))
names(data) <- c('n', 'frequency' )
print(data)
## n frequency
## 1 0 2962
## 2 1 382
## 3 2 47
## 4 3 25
## 5 4 4
## Make it look neat
library(kableExtra)
library(knitr)
options(knitr.table.format = "html")
kable(data) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = F, position = "left")
|
n
|
frequency
|
|
0
|
2962
|
|
1
|
382
|
|
2
|
47
|
|
3
|
25
|
|
4
|
4
|
## Fit poisson distribution to the data
## This is what dpois() does - available with R already to save your time
poisson <- function(x, lambda) {
probfn <- exp(-lambda) * (lambda ^ x) / factorial(x)
return(probfn)
}
## Prepare some stats
attach(data)
N <- sum(frequency)
RF <- frequency/N
DF <- data.frame(data, round(RF, 5))
MEAN <- sum(RF * n)
VAR <- (sum(n^2*frequency) - N*MEAN^2)/(N-1) # else use (MEAN+MEAN^2/R)
DISP <- VAR/MEAN # over dispersion
THETA <- 1/DISP
R <- MEAN*THETA/(1-THETA) # MEAN = R(1-THETA)/THETA
cbind(MEAN,VAR,DISP,THETA,R)
## MEAN VAR DISP THETA R
## [1,] 0.1657895 0.2237489 1.349596 0.7409623 0.4742312
x = n
(E_poi = round(N * dpois(x, lambda=MEAN),5))
## [1] 2897.50806 480.37634 39.82067 2.20062 0.09121
par(mfrow=c(1,1))
barplot(matrix(c(frequency,E_poi),nr=2, byrow = TRUE), beside=T,
col=c("aquamarine3","coral"),
names.arg=x)
legend("topright", c("Observed","Expected Poisson"), pch=15,
col=c("aquamarine3","coral"),
bty="n")

CT = chisq.test(rbind(frequency,E_poi)); CT
## Warning in chisq.test(rbind(frequency, E_poi)): Chi-squared approximation
## may be incorrect
##
## Pearson's Chi-squared test
##
## data: rbind(frequency, E_poi)
## X-squared = 35.371, df = 4, p-value = 3.898e-07
C = CT$statistic; C
## X-squared
## 35.37065
pVal = CT$p.value
RES = frequency - E_poi
par(mfrow=c(1,3))
plot(x,RES)
SR <- sum(RES)
MSE <- mean(RES^2)
SUMMARY <- matrix(c(SR, C, pVal, MSE), byrow = TRUE)
colnames(SUMMARY) <- c("Poisson Fit")
rownames(SUMMARY) <- c("sum of residuals", "Chi-square Statistic", "p-value", "MSE")
SUMMARY
## Poisson Fit
## sum of residuals 3.100000e-03
## Chi-square Statistic 3.537065e+01
## p-value 3.898172e-07
## MSE 2.884750e+03

### Combining categories
(Actual <- c(2962,382,76))
## [1] 2962 382 76
(Expected <- c(E_poi[1], E_poi[2], sum(E_poi[3],E_poi[4],E_poi[5])))
## [1] 2897.5081 480.3763 42.1125
(combnd <- chisq.test(expChiSq <- rbind(Actual,Expected)))
##
## Pearson's Chi-squared test
##
## data: expChiSq <- rbind(Actual, Expected)
## X-squared = 21.655, df = 2, p-value = 1.985e-05