library("HistData")
library("plotly")
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
data = Arbuthnot
plot(data$Year, data$Ratio, pch = 16, col = "blue", cex = 0.8,
main = "Male Births / Femail Births over Year",
xlab = "Year", ylab = "Ratio"
)
trend = lm(data$Ratio ~ data$Year)
abline(trend)
## The feature that stood out was that the male to female birth rate flutuates over the years, but it was consistently above the reference point of 1, which means in all the years this data range covers, there has already been more male birth than female birth. To highlight this feature, a line is added to provide a clear visual on the fact that the ratio has always been higher than 1.
plot(data$Year, data$Total, type = 'n',
main = "Total Births over Year",
xlab = "Year", ylab = "Total (in 000s)")
lines(data$Year, data$Total, type='l')
##Starting from 1640, the total births keep decreasing until around 1660 where the number of total births is only the half of historical peak.From 1660, the total briths start growing quickly until 1965-1966 when the total births jump by almost 30%.After 1685, the total births become stable between 14,000 - 15,000 but experience a big jump in 1704 and recover quickly.
library('vcd')
## Warning: package 'vcd' was built under R version 3.4.3
## Loading required package: grid
data = WomenQueue
fit = goodfit(data, type = "binomial", par = list(size = 10))
fit$par
## $prob
## [1] 0.435
##
## $size
## [1] 10
plot(fit, type = "standing", shade = TRUE)
##(d)Suggest some reasons why the number of women in queues of length 10 might depart from a binomial distribution, Bin(n = 10, p = 1/2).
fit2 = goodfit(data, type = "binomial", par = list(prob = .5,size = 10))
plot(fit2, type = "deviation", shade = TRUE)
## Warning in max.c * pmax(pmin(interpolate(abs(res)), 1), 0): longer object
## length is not a multiple of shorter object length
## Warning in max.c * pmax(pmin(interpolate(abs(res)), 1), 0): longer object
## length is not a multiple of shorter object length
##Possible Reason can be:(a) The sample size is relatively small which might not be able to fully represent the real distribution;(b) The real distribution might not be binomial. i.e. Female is less likely get into the queue.
data = Saxony
fit = goodfit(data, type = "binomial", par = list(prob = 0.5, size = 12))
summary(fit)
## Warning in summary.goodfit(fit): Chi-squared approximation may be incorrect
##
## Goodness-of-fit test for binomial distribution
##
## X^2 df P(> X^2)
## Pearson 249.1954 12 2.013281e-46
## Likelihood Ratio 205.4060 12 2.493625e-37
Ratio = 205.4060/12
Ratio
## [1] 17.11717
fit2 = goodfit(data, type = "binomial", par = list(size = 12))
fit2$par
## $prob
## [1] 0.519215
##
## $size
## [1] 12
summary(fit2)
##
## Goodness-of-fit test for binomial distribution
##
## X^2 df P(> X^2)
## Likelihood Ratio 97.0065 11 6.978187e-16
plot(fit, type = "standing", shade = TRUE, main = "Bin(n=12, p=0.5)")
## Warning in max.c * pmax(pmin(interpolate(abs(res)), 1), 0): longer object
## length is not a multiple of shorter object length
plot(fit2, type = "standing", shade = TRUE, main = "Bin(n=12, p=p head)")