Problem 13

hb1 <- c(7.2, 7.7, 8, 8.1, 8.3, 8.4, 8.4, 8.5, 8.6, 8.7, 9.1, 9.1, 9.1, 9.8, 10.1,10.3)

hb2 <- c(8.1, 9.2, 10, 10.4, 10.6, 10.9, 11.1, 11.9, 12, 12.1)

par(mfrow = c(1,2))

boxplot(hb1)

boxplot(hb2)

# A normal distribution could be a valiable option with X~ N(μ,o^2).

mü1 <- mean(hb1)
var1 <- var(hb1)


y1=dnorm(hb1, mü1, sqrt(var1))

plot(hb1,y1, type = "l" )

#--------------------#

mü2 <- mean(hb2)
var2 <- var(hb2)

y2=dnorm(hb2, mü2, sqrt(var2))

plot(hb2, y2, type = "l")

The assumption I made in b) doesnt hold up perfectly since in the qq-Plot the data does not follow “perfectly”, the qqline (at the end and beginnig quantiles),implying there might be a better distribution than the normal one.

par(mfrow = c(2,2))

boxplot(hb1)
qqnorm(hb1)
qqline(hb1)


boxplot(hb2)
qqnorm(hb2)
qqline(hb2)

Problem 14

library("spam")
## Warning: Paket 'spam' wurde unter R Version 4.1.2 erstellt
## Spam version 2.8-0 (2022-01-05) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
## 
## Attache Paket: 'spam'
## Die folgenden Objekte sind maskiert von 'package:base':
## 
##     backsolve, forwardsolve
summary(Oral)
##        Y                E                SMR        
##  Min.   :  1.00   Min.   :  3.011   Min.   :0.1460  
##  1st Qu.:  9.00   1st Qu.: 10.883   1st Qu.:0.7219  
##  Median : 19.00   Median : 19.503   Median :0.9279  
##  Mean   : 28.43   Mean   : 28.430   Mean   :0.9753  
##  3rd Qu.: 33.00   3rd Qu.: 33.217   3rd Qu.:1.1741  
##  Max.   :501.00   Max.   :393.094   Max.   :2.3957
dato <- Oral
x_num <- as.numeric(unlist(dato))

class(x_num)
## [1] "numeric"
which.max(x_num)
## [1] 328
?subset
 

v1 <- subset(dato, 35 <= E & E <= 45, select = "Y")


#Just to inspect if the subset fullfills the requirement
vE <- subset(dato, 35 <= E & E <= 45, select = "E")



v_num <- as.numeric(unlist(v1))

#since lambda = Expected Value = Variance (Best estimator)
lambda <- mean(v_num)

lambda
## [1] 36
v_dist <- dpois(v_num, lambda)

qqplot(v_dist, 1:35)

Judging qualitatively: its a pretty okay fit (poisson).

mydata <- rpois(35, 36)


par(mfrow = c(1,2))

qqplot(mydata, 1:35)

qqplot(v_dist, 1:35)

Yi <- subset(dato, select = "Y")
Ei <- subset(dato, select = "E")

Zi <- Yi/Ei

zbar <- sum(Zi)/544
zbar
## [1] 0.9752946
# Since R already calculates the sample variance
sigma2 <- var(Zi)

sigma <- sqrt(sigma2)

CI.r <- zbar + 1.96 * (sigma/sqrt(544))
CI.l <- zbar - 1.96 * (sigma/sqrt(544))

table(CI.l,CI.r)
##                    CI.r
## CI.l                1.00488193611431
##   0.945707183682674                1

The bootstrapping resulted in a similar value as in e).

Zi_num <- as.numeric(unlist(Zi))

boots <- mean(sample(Zi_num, 1000, replace = T))


boots
## [1] 0.9690828