Homework #1

The problems are below. Please fill in your repsonses either in the code chunck or where indicated.

Due Date: Tuesday September 24, 2019 by 11:55p.m. EST.

This assignment is worth 100 points. Each problem is worth 8 points each, except the last problem (#3.8b) which is worth 4 points.

Sumbission Instructions: Knit this document, with all your responses, to a .HTML file. Save the .HTML file as Homework#1_your_last_name and upload the HTML file to the Homework #1 link on Moodle under the Week #3 module.

p. 110 #3.3a, b, c

#part a
library("vcd"); library("grid")

## Warning: package 'vcd' was built under R version 3.5.3

## Loading required package: grid

data(WomenQueue)
barplot(WomenQueue, main="WomenQueue Frequency Distribution",xlab="Numbers")

#part b
data("WomenQueue")
WQ.fit=goodfit(WomenQueue,type ="nbinomial")
plot(WQ.fit,main = "negative binomial")

p. 110 #3.4 a, b, c

#part a
data("Saxony",package = "vcd")
Sa.fit=goodfit(Saxony,type = "binomial",par = list(prob=0.5, size=12))
summary(Sa.fit)

## Warning in summary.goodfit(Sa.fit): Chi-squared approximation may be
## incorrect

## 
##   Goodness-of-fit test for binomial distribution
## 
##                       X^2 df     P(> X^2)
## Pearson          249.1954 12 2.013281e-46
## Likelihood Ratio 205.4060 12 2.493625e-37

ratio=205.4060/12
ratio

## [1] 17.11717

# pvalue is less than chi-square so I fail to reject null hypothes. And my conclusion is the variables are independent.

#part b
Sa.fit = goodfit(Saxony, type = "binomial", par = list(prob = 0.5, size = 12))
Sa.fit

## 
## Observed and fitted values for binomial distribution
## with fixed parameters 
## 
##  count observed     fitted pearson residual
##      0        3    1.49292        1.2334401
##      1       24   17.91504        1.4376359
##      2      104   98.53271        0.5507842
##      3      286  328.44238       -2.3419098
##      4      670  738.99536       -2.5380434
##      5     1033 1182.39258       -4.3445838
##      6     1343 1379.45801       -0.9816094
##      7     1112 1182.39258       -2.0471328
##      8      829  738.99536        3.3108845
##      9      478  328.44238        8.2523747
##     10      181   98.53271        8.3079041
##     11       45   17.91504        6.3991064
##     12        7    1.49292        4.5071617

unlist(Sa.fit$par)

## prob size 
##  0.5 12.0

summary(Sa.fit)

## Warning in summary.goodfit(Sa.fit): Chi-squared approximation may be
## incorrect

## 
##   Goodness-of-fit test for binomial distribution
## 
##                       X^2 df     P(> X^2)
## Pearson          249.1954 12 2.013281e-46
## Likelihood Ratio 205.4060 12 2.493625e-37

Sa.fit1=goodfit(Saxony, type = "binomial", par = list(size = 12))
Sa.fit1

## 
## Observed and fitted values for binomial distribution
## with parameters estimated by `ML' 
## 
##  count observed       fitted pearson residual
##      0        3    0.9328394        2.1402809
##      1       24   12.0888374        3.4257991
##      2      104   71.8031709        3.7996298
##      3      286  258.4751335        1.7120476
##      4      670  628.0550119        1.6737139
##      5     1033 1085.2107008       -1.5849023
##      6     1343 1367.2793552       -0.6566116
##      7     1112 1265.6303069       -4.3184059
##      8      829  854.2466464       -0.8637977
##      9      478  410.0125627        3.3576088
##     10      181  132.8357027        4.1789562
##     11       45   26.0824586        3.7041659
##     12        7    2.3472734        3.0368664

unlist(Sa.fit1$par)

##      prob      size 
##  0.519215 12.000000

summary(Sa.fit1)

## 
##   Goodness-of-fit test for binomial distribution
## 
##                      X^2 df     P(> X^2)
## Likelihood Ratio 97.0065 11 6.978187e-16

#part c
plot(goodfit(Saxony, type = "binomial", par = list(prob = 0.5, size = 12)),xlab="Number of Males", main = 'Probability = 0.5', ylab = 'SquareRoot of Frequency')

plot(goodfit(Saxony, type = "binomial", par = list(size = 12)),main='Probability =0.519215 (estimated from data)', xlab ='Number of Males', ylab ='SquareRoot of Frequency')

p. 110 #3.6 a, b, d

#part a
count= 0:5
Freq=c(129, 83, 20, 9, 5, 1)
df=data.frame(count,Freq)
df.tab <- xtabs(Freq ~ count, df)
head(df.tab)

## count
##   0   1   2   3   4   5 
## 129  83  20   9   5   1

#part b
PoisModel.fit <- goodfit(df.tab, type = "poisson")
plot(PoisModel.fit, type = "standing", xlab="count", main = "Poisson Model")

What do you conclude? Answer: There are lots of deviations from the actual value

p. 111 #3.7 a, b

#part a
library("vcdExtra")

## Warning: package 'vcdExtra' was built under R version 3.5.3

## Loading required package: gnm

## Warning: package 'gnm' was built under R version 3.5.3

data("Geissler")
boys=0:11
Freq1=c(8,72,275,837,1540,2161,2310,1801,1077,492,93,24)
df2=data.frame(boys,Freq1)
df2.tab<-xtabs(Freq1~boys,df2)

#part b
Geis.fit = goodfit(df2.tab, type="binomial")

## Warning in goodfit(df2.tab, type = "binomial"): size was not given, taken
## as maximum count

Geis.fit

## 
## Observed and fitted values for binomial distribution
## with parameters estimated by `ML' 
## 
##  count observed      fitted pearson residual
##      0        8    3.561571        2.3518439
##      1       72   41.947913        4.6400157
##      2      275  224.572436        3.3650364
##      3      837  721.362873        4.3054683
##      4     1540 1544.755904       -0.1210049
##      5     2161 2315.602347       -3.2128030
##      6     2310 2479.362698       -3.4013219
##      7     1801 1896.217320       -2.1866129
##      8     1077 1015.159294        1.9409187
##      9      492  362.317259        6.8129887
##     10       93   77.588097        1.7496804
##     11       24    7.552287        5.9850291

plot(Geis.fit, type = "standing", xlab="boys", main = "binomial Model")

Is there an indication that the binomial does not fit these data? #Answer: Yes, there are some deviation that indicates the binomial does not fit these data well.Like group 9.

p. 111 #3.8 a, b

#part a
data("Bundesliga", package="vcd")
BL1995 =xtabs(~HomeGoals + AwayGoals, data=Bundesliga, subset= Year==1995)
BL1995

##          AwayGoals
## HomeGoals  0  1  2  3  4  5  6
##         0 26 16 13  5  0  1  0
##         1 19 58 20  5  4  0  1
##         2 27 23 20  5  1  1  1
##         3 14 11 10  4  2  0  0
##         4  3  5  3  0  0  0  0
##         5  4  1  0  1  0  0  0
##         6  1  0  0  1  0  0  0

bund.df = as.data.frame(BL1995, stringsAsFactors=FALSE)
bund.df = within(bund.df,
{
HomeGoals = as.numeric(HomeGoals) # make numeric
AwayGoals = as.numeric(AwayGoals) # make numeric
TotalGoals = HomeGoals + AwayGoals # total goals
})
str(bund.df)

## 'data.frame':    49 obs. of  4 variables:
##  $ HomeGoals : num  0 1 2 3 4 5 6 0 1 2 ...
##  $ AwayGoals : num  0 0 0 0 0 0 0 1 1 1 ...
##  $ Freq      : int  26 19 27 14 3 4 1 16 58 23 ...
##  $ TotalGoals: num  0 1 2 3 4 5 6 1 2 3 ...

#part b

bund.df.tab<-xtabs(Freq~HomeGoals,bund.df)
bund.df2.tab<-xtabs(Freq~AwayGoals,bund.df)
bund.df3.tab<-xtabs(Freq~TotalGoals,bund.df)
Geis.fit1 = goodfit(bund.df.tab, type="poisson")
plot(Geis.fit1, type = "standing", xlab="HomeGoals", main = "Poisson Model")

Geis.fit2 = goodfit(bund.df2.tab, type="poisson")
plot(Geis.fit2, type = "standing", xlab="AwayGoals", main = "Poisson Model")

Geis.fit3 = goodfit(bund.df3.tab, type="poisson")
plot(Geis.fit3, type = "standing", xlab="TotalGoals", main = "Poisson Model")

Answer: Poisson provides the reasonable fit on HomeGoals and AwayGoals, but not well fit on TotalGoals

Homework #1

Dan Liu

9/22/19

Homework #1