The problems are below. Please fill in your repsonses either in the code chunck or where indicated.
Due Date: Tuesday September 24, 2019 by 11:55p.m. EST.
This assignment is worth 100 points. Each problem is worth 8 points each, except the last problem (#3.8b) which is worth 4 points.
Sumbission Instructions: Knit this document, with all your responses, to a .HTML file. Save the .HTML file as Homework#1_your_last_name and upload the HTML file to the Homework #1 link on Moodle under the Week #3 module.
p. 110 #3.3a, b, c
#part a
library("vcd"); library("grid")
## Warning: package 'vcd' was built under R version 3.5.3
## Loading required package: grid
data(WomenQueue)
barplot(WomenQueue, main="WomenQueue Frequency Distribution",xlab="Numbers")
#part b
data("WomenQueue")
WQ.fit=goodfit(WomenQueue,type ="nbinomial")
plot(WQ.fit,main = "negative binomial")
p. 110 #3.4 a, b, c
#part a
data("Saxony",package = "vcd")
Sa.fit=goodfit(Saxony,type = "binomial",par = list(prob=0.5, size=12))
summary(Sa.fit)
## Warning in summary.goodfit(Sa.fit): Chi-squared approximation may be
## incorrect
##
## Goodness-of-fit test for binomial distribution
##
## X^2 df P(> X^2)
## Pearson 249.1954 12 2.013281e-46
## Likelihood Ratio 205.4060 12 2.493625e-37
ratio=205.4060/12
ratio
## [1] 17.11717
# pvalue is less than chi-square so I fail to reject null hypothes. And my conclusion is the variables are independent.
#part b
Sa.fit = goodfit(Saxony, type = "binomial", par = list(prob = 0.5, size = 12))
Sa.fit
##
## Observed and fitted values for binomial distribution
## with fixed parameters
##
## count observed fitted pearson residual
## 0 3 1.49292 1.2334401
## 1 24 17.91504 1.4376359
## 2 104 98.53271 0.5507842
## 3 286 328.44238 -2.3419098
## 4 670 738.99536 -2.5380434
## 5 1033 1182.39258 -4.3445838
## 6 1343 1379.45801 -0.9816094
## 7 1112 1182.39258 -2.0471328
## 8 829 738.99536 3.3108845
## 9 478 328.44238 8.2523747
## 10 181 98.53271 8.3079041
## 11 45 17.91504 6.3991064
## 12 7 1.49292 4.5071617
unlist(Sa.fit$par)
## prob size
## 0.5 12.0
summary(Sa.fit)
## Warning in summary.goodfit(Sa.fit): Chi-squared approximation may be
## incorrect
##
## Goodness-of-fit test for binomial distribution
##
## X^2 df P(> X^2)
## Pearson 249.1954 12 2.013281e-46
## Likelihood Ratio 205.4060 12 2.493625e-37
Sa.fit1=goodfit(Saxony, type = "binomial", par = list(size = 12))
Sa.fit1
##
## Observed and fitted values for binomial distribution
## with parameters estimated by `ML'
##
## count observed fitted pearson residual
## 0 3 0.9328394 2.1402809
## 1 24 12.0888374 3.4257991
## 2 104 71.8031709 3.7996298
## 3 286 258.4751335 1.7120476
## 4 670 628.0550119 1.6737139
## 5 1033 1085.2107008 -1.5849023
## 6 1343 1367.2793552 -0.6566116
## 7 1112 1265.6303069 -4.3184059
## 8 829 854.2466464 -0.8637977
## 9 478 410.0125627 3.3576088
## 10 181 132.8357027 4.1789562
## 11 45 26.0824586 3.7041659
## 12 7 2.3472734 3.0368664
unlist(Sa.fit1$par)
## prob size
## 0.519215 12.000000
summary(Sa.fit1)
##
## Goodness-of-fit test for binomial distribution
##
## X^2 df P(> X^2)
## Likelihood Ratio 97.0065 11 6.978187e-16
#part c
plot(goodfit(Saxony, type = "binomial", par = list(prob = 0.5, size = 12)),xlab="Number of Males", main = 'Probability = 0.5', ylab = 'SquareRoot of Frequency')
plot(goodfit(Saxony, type = "binomial", par = list(size = 12)),main='Probability =0.519215 (estimated from data)', xlab ='Number of Males', ylab ='SquareRoot of Frequency')
p. 110 #3.6 a, b, d
#part a
count= 0:5
Freq=c(129, 83, 20, 9, 5, 1)
df=data.frame(count,Freq)
df.tab <- xtabs(Freq ~ count, df)
head(df.tab)
## count
## 0 1 2 3 4 5
## 129 83 20 9 5 1
#part b
PoisModel.fit <- goodfit(df.tab, type = "poisson")
plot(PoisModel.fit, type = "standing", xlab="count", main = "Poisson Model")
p. 111 #3.7 a, b
#part a
library("vcdExtra")
## Warning: package 'vcdExtra' was built under R version 3.5.3
## Loading required package: gnm
## Warning: package 'gnm' was built under R version 3.5.3
data("Geissler")
boys=0:11
Freq1=c(8,72,275,837,1540,2161,2310,1801,1077,492,93,24)
df2=data.frame(boys,Freq1)
df2.tab<-xtabs(Freq1~boys,df2)
#part b
Geis.fit = goodfit(df2.tab, type="binomial")
## Warning in goodfit(df2.tab, type = "binomial"): size was not given, taken
## as maximum count
Geis.fit
##
## Observed and fitted values for binomial distribution
## with parameters estimated by `ML'
##
## count observed fitted pearson residual
## 0 8 3.561571 2.3518439
## 1 72 41.947913 4.6400157
## 2 275 224.572436 3.3650364
## 3 837 721.362873 4.3054683
## 4 1540 1544.755904 -0.1210049
## 5 2161 2315.602347 -3.2128030
## 6 2310 2479.362698 -3.4013219
## 7 1801 1896.217320 -2.1866129
## 8 1077 1015.159294 1.9409187
## 9 492 362.317259 6.8129887
## 10 93 77.588097 1.7496804
## 11 24 7.552287 5.9850291
plot(Geis.fit, type = "standing", xlab="boys", main = "binomial Model")
Is there an indication that the binomial does not fit these data? #Answer: Yes, there are some deviation that indicates the binomial does not fit these data well.Like group 9.
p. 111 #3.8 a, b
#part a
data("Bundesliga", package="vcd")
BL1995 =xtabs(~HomeGoals + AwayGoals, data=Bundesliga, subset= Year==1995)
BL1995
## AwayGoals
## HomeGoals 0 1 2 3 4 5 6
## 0 26 16 13 5 0 1 0
## 1 19 58 20 5 4 0 1
## 2 27 23 20 5 1 1 1
## 3 14 11 10 4 2 0 0
## 4 3 5 3 0 0 0 0
## 5 4 1 0 1 0 0 0
## 6 1 0 0 1 0 0 0
bund.df = as.data.frame(BL1995, stringsAsFactors=FALSE)
bund.df = within(bund.df,
{
HomeGoals = as.numeric(HomeGoals) # make numeric
AwayGoals = as.numeric(AwayGoals) # make numeric
TotalGoals = HomeGoals + AwayGoals # total goals
})
str(bund.df)
## 'data.frame': 49 obs. of 4 variables:
## $ HomeGoals : num 0 1 2 3 4 5 6 0 1 2 ...
## $ AwayGoals : num 0 0 0 0 0 0 0 1 1 1 ...
## $ Freq : int 26 19 27 14 3 4 1 16 58 23 ...
## $ TotalGoals: num 0 1 2 3 4 5 6 1 2 3 ...
#part b
bund.df.tab<-xtabs(Freq~HomeGoals,bund.df)
bund.df2.tab<-xtabs(Freq~AwayGoals,bund.df)
bund.df3.tab<-xtabs(Freq~TotalGoals,bund.df)
Geis.fit1 = goodfit(bund.df.tab, type="poisson")
plot(Geis.fit1, type = "standing", xlab="HomeGoals", main = "Poisson Model")
Geis.fit2 = goodfit(bund.df2.tab, type="poisson")
plot(Geis.fit2, type = "standing", xlab="AwayGoals", main = "Poisson Model")
Geis.fit3 = goodfit(bund.df3.tab, type="poisson")
plot(Geis.fit3, type = "standing", xlab="TotalGoals", main = "Poisson Model")
Answer: Poisson provides the reasonable fit on HomeGoals and AwayGoals, but not well fit on TotalGoals